Commit 2aa6ac03 authored by Florent Revest's avatar Florent Revest Committed by Will Deacon
Browse files

arm64: ftrace: Add direct call support



This builds up on the CALL_OPS work which extends the ftrace patchsite
on arm64 with an ops pointer usable by the ftrace trampoline.

This ops pointer is valid at all time. Indeed, it is either pointing to
ftrace_list_ops or to the single ops which should be called from that
patchsite.

There are a few cases to distinguish:
- If a direct call ops is the only one tracing a function:
  - If the direct called trampoline is within the reach of a BL
    instruction
     -> the ftrace patchsite jumps to the trampoline
  - Else
     -> the ftrace patchsite jumps to the ftrace_caller trampoline which
        reads the ops pointer in the patchsite and jumps to the direct
        call address stored in the ops
- Else
  -> the ftrace patchsite jumps to the ftrace_caller trampoline and its
     ops literal points to ftrace_list_ops so it iterates over all
     registered ftrace ops, including the direct call ops and calls its
     call_direct_funcs handler which stores the direct called
     trampoline's address in the ftrace_regs and the ftrace_caller
     trampoline will return to that address instead of returning to the
     traced function

Signed-off-by: default avatarFlorent Revest <revest@chromium.org>
Co-developed-by: default avatarMark Rutland <mark.rutland@arm.com>
Signed-off-by: default avatarMark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230405180250.2046566-2-revest@chromium.org


Signed-off-by: default avatarWill Deacon <will@kernel.org>
parent f89b30b8
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -185,6 +185,10 @@ config ARM64
	select HAVE_DEBUG_KMEMLEAK
	select HAVE_DMA_CONTIGUOUS
	select HAVE_DYNAMIC_FTRACE
	select HAVE_DYNAMIC_FTRACE_WITH_ARGS \
		if $(cc-option,-fpatchable-function-entry=2)
	select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \
		if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS
	select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
		if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG && \
		    !CC_OPTIMIZE_FOR_SIZE)
+22 −0
Original line number Diff line number Diff line
@@ -70,10 +70,19 @@ struct ftrace_ops;

#define arch_ftrace_get_regs(regs) NULL

/*
 * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct
 * stack alignment
 */
struct ftrace_regs {
	/* x0 - x8 */
	unsigned long regs[9];

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
	unsigned long direct_tramp;
#else
	unsigned long __unused;
#endif

	unsigned long fp;
	unsigned long lr;
@@ -136,6 +145,19 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
		       struct ftrace_ops *op, struct ftrace_regs *fregs);
#define ftrace_graph_func ftrace_graph_func

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
						 unsigned long addr)
{
	/*
	 * The ftrace trampoline will return to this address instead of the
	 * instrumented function.
	 */
	fregs->direct_tramp = addr;
}
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */

#endif

#define ftrace_return_address(n) return_address(n)
+6 −0
Original line number Diff line number Diff line
@@ -93,6 +93,9 @@ int main(void)
  DEFINE(FREGS_LR,		offsetof(struct ftrace_regs, lr));
  DEFINE(FREGS_SP,		offsetof(struct ftrace_regs, sp));
  DEFINE(FREGS_PC,		offsetof(struct ftrace_regs, pc));
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  DEFINE(FREGS_DIRECT_TRAMP,	offsetof(struct ftrace_regs, direct_tramp));
#endif
  DEFINE(FREGS_SIZE,		sizeof(struct ftrace_regs));
  BLANK();
#endif
@@ -197,6 +200,9 @@ int main(void)
#endif
#ifdef CONFIG_FUNCTION_TRACER
  DEFINE(FTRACE_OPS_FUNC,		offsetof(struct ftrace_ops, func));
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  DEFINE(FTRACE_OPS_DIRECT_CALL,	offsetof(struct ftrace_ops, direct_call));
#endif
#endif
  return 0;
}
+75 −15
Original line number Diff line number Diff line
@@ -36,6 +36,31 @@
SYM_CODE_START(ftrace_caller)
	bti	c

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
	/*
	 * The literal pointer to the ops is at an 8-byte aligned boundary
	 * which is either 12 or 16 bytes before the BL instruction in the call
	 * site. See ftrace_call_adjust() for details.
	 *
	 * Therefore here the LR points at `literal + 16` or `literal + 20`,
	 * and we can find the address of the literal in either case by
	 * aligning to an 8-byte boundary and subtracting 16. We do the
	 * alignment first as this allows us to fold the subtraction into the
	 * LDR.
	 */
	bic	x11, x30, 0x7
	ldr	x11, [x11, #-(4 * AARCH64_INSN_SIZE)]		// op

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
	/*
	 * If the op has a direct call, handle it immediately without
	 * saving/restoring registers.
	 */
	ldr	x17, [x11, #FTRACE_OPS_DIRECT_CALL]		// op->direct_call
	cbnz	x17, ftrace_caller_direct
#endif
#endif

	/* Save original SP */
	mov	x10, sp

@@ -49,6 +74,10 @@ SYM_CODE_START(ftrace_caller)
	stp	x6, x7, [sp, #FREGS_X6]
	str	x8,     [sp, #FREGS_X8]

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
	str	xzr, [sp, #FREGS_DIRECT_TRAMP]
#endif

	/* Save the callsite's FP, LR, SP */
	str	x29, [sp, #FREGS_FP]
	str	x9,  [sp, #FREGS_LR]
@@ -71,20 +100,7 @@ SYM_CODE_START(ftrace_caller)
	mov	x3, sp					// regs

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
	/*
	 * The literal pointer to the ops is at an 8-byte aligned boundary
	 * which is either 12 or 16 bytes before the BL instruction in the call
	 * site. See ftrace_call_adjust() for details.
	 *
	 * Therefore here the LR points at `literal + 16` or `literal + 20`,
	 * and we can find the address of the literal in either case by
	 * aligning to an 8-byte boundary and subtracting 16. We do the
	 * alignment first as this allows us to fold the subtraction into the
	 * LDR.
	 */
	bic	x2, x30, 0x7
	ldr	x2, [x2, #-16]				// op

	mov	x2, x11					// op
	ldr	x4, [x2, #FTRACE_OPS_FUNC]		// op->func
	blr	x4					// op->func(ip, parent_ip, op, regs)

@@ -107,8 +123,15 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
	ldp	x6, x7, [sp, #FREGS_X6]
	ldr	x8,     [sp, #FREGS_X8]

	/* Restore the callsite's FP, LR, PC */
	/* Restore the callsite's FP */
	ldr	x29, [sp, #FREGS_FP]

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
	ldr	x17, [sp, #FREGS_DIRECT_TRAMP]
	cbnz	x17, ftrace_caller_direct_late
#endif

	/* Restore the callsite's LR and PC */
	ldr	x30, [sp, #FREGS_LR]
	ldr	x9,  [sp, #FREGS_PC]

@@ -116,8 +139,45 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
	add	sp, sp, #FREGS_SIZE + 32

	ret	x9

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_INNER_LABEL(ftrace_caller_direct_late, SYM_L_LOCAL)
	/*
	 * Head to a direct trampoline in x17 after having run other tracers.
	 * The ftrace_regs are live, and x0-x8 and FP have been restored. The
	 * LR, PC, and SP have not been restored.
	 */

	/*
	 * Restore the callsite's LR and PC matching the trampoline calling
	 * convention.
	 */
	ldr	x9,  [sp, #FREGS_LR]
	ldr	x30, [sp, #FREGS_PC]

	/* Restore the callsite's SP */
	add	sp, sp, #FREGS_SIZE + 32

SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL)
	/*
	 * Head to a direct trampoline in x17.
	 *
	 * We use `BR X17` as this can safely land on a `BTI C` or `PACIASP` in
	 * the trampoline, and will not unbalance any return stack.
	 */
	br	x17
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
SYM_CODE_END(ftrace_caller)

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_CODE_START(ftrace_stub_direct_tramp)
	bti	c
	mov	x10, x30
	mov	x30, x9
	ret	x10
SYM_CODE_END(ftrace_stub_direct_tramp)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */

#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */

/*
+31 −5
Original line number Diff line number Diff line
@@ -206,6 +206,13 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
	return NULL;
}

static bool reachable_by_bl(unsigned long addr, unsigned long pc)
{
	long offset = (long)addr - (long)pc;

	return offset >= -SZ_128M && offset < SZ_128M;
}

/*
 * Find the address the callsite must branch to in order to reach '*addr'.
 *
@@ -220,14 +227,21 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
				      unsigned long *addr)
{
	unsigned long pc = rec->ip;
	long offset = (long)*addr - (long)pc;
	struct plt_entry *plt;

	/*
	 * If a custom trampoline is unreachable, rely on the ftrace_caller
	 * trampoline which knows how to indirectly reach that trampoline
	 * through ops->direct_call.
	 */
	if (*addr != FTRACE_ADDR && !reachable_by_bl(*addr, pc))
		*addr = FTRACE_ADDR;

	/*
	 * When the target is within range of the 'BL' instruction, use 'addr'
	 * as-is and branch to that directly.
	 */
	if (offset >= -SZ_128M && offset < SZ_128M)
	if (reachable_by_bl(*addr, pc))
		return true;

	/*
@@ -330,12 +344,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
		       unsigned long addr)
{
	if (WARN_ON_ONCE(old_addr != (unsigned long)ftrace_caller))
	unsigned long pc = rec->ip;
	u32 old, new;
	int ret;

	ret = ftrace_rec_set_ops(rec, arm64_rec_get_ops(rec));
	if (ret)
		return ret;

	if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
		return -EINVAL;
	if (WARN_ON_ONCE(addr != (unsigned long)ftrace_caller))
	if (!ftrace_find_callable_addr(rec, NULL, &addr))
		return -EINVAL;

	return ftrace_rec_update_ops(rec);
	old = aarch64_insn_gen_branch_imm(pc, old_addr,
					  AARCH64_INSN_BRANCH_LINK);
	new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);

	return ftrace_modify_code(pc, old, new, true);
}
#endif