Commit ee3e2469 authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

x86/ftrace: Make it call depth tracking aware



Since ftrace has trampolines, don't use thunks for the __fentry__ site
but instead require that every function called from there includes
accounting. This very much includes all the direct-call functions.

Additionally, ftrace uses ROP tricks in two places:

 - return_to_handler(), and
 - ftrace_regs_caller() when pt_regs->orig_ax is set by a direct-call.

return_to_handler() already uses a retpoline to replace an
indirect-jump to defeat IBT, since this is a jump-type retpoline, make
sure there is no accounting done and ALTERNATIVE the RET into a ret.

ftrace_regs_caller() does much the same and gets the same treatment.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.927545073@infradead.org
parent 36b64f10
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -343,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void)
{
	x86_return_thunk = &__x86_return_skl;
}

#define CALL_DEPTH_ACCOUNT					\
	ALTERNATIVE("",						\
		    __stringify(INCREMENT_CALL_DEPTH),		\
		    X86_FEATURE_CALL_DEPTH)

#ifdef CONFIG_CALL_THUNKS_DEBUG
DECLARE_PER_CPU(u64, __x86_call_count);
DECLARE_PER_CPU(u64, __x86_ret_count);
@@ -351,6 +357,9 @@ DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif
#else
static inline void x86_set_skl_return_thunk(void) {}

#define CALL_DEPTH_ACCOUNT ""

#endif

#ifdef CONFIG_RETPOLINE
+1 −1
Original line number Diff line number Diff line
@@ -316,7 +316,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
		return 0;

	/* Is function call target a thunk? */
	if (is_callthunk(func))
	if (func && is_callthunk(func))
		return 0;

	memcpy(*pprog, tmpl, tmpl_size);
+12 −4
Original line number Diff line number Diff line
@@ -69,6 +69,10 @@ static const char *ftrace_nop_replace(void)

static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
	/*
	 * No need to translate into a callthunk. The trampoline does
	 * the depth accounting itself.
	 */
	return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
}

@@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
	unsigned long size;
	unsigned long *ptr;
	void *trampoline;
	void *ip;
	void *ip, *dest;
	/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
	unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
	unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
@@ -404,10 +408,14 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
	/* put in the call to the function */
	mutex_lock(&text_mutex);
	call_offset -= start_offset;
	/*
	 * No need to translate into a callthunk. The trampoline does
	 * the depth accounting before the call already.
	 */
	dest = ftrace_ops_get_func(ops);
	memcpy(trampoline + call_offset,
	       text_gen_insn(CALL_INSN_OPCODE,
			     trampoline + call_offset,
			     ftrace_ops_get_func(ops)), CALL_INSN_SIZE);
	       text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
	       CALL_INSN_SIZE);
	mutex_unlock(&text_mutex);

	/* ALLOC_TRAMP flags lets us know we created it */
+20 −2
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
 */

#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/ftrace.h>
#include <asm/export.h>
@@ -132,6 +133,7 @@
#ifdef CONFIG_DYNAMIC_FTRACE

SYM_FUNC_START(__fentry__)
	CALL_DEPTH_ACCOUNT
	RET
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
@@ -140,6 +142,8 @@ SYM_FUNC_START(ftrace_caller)
	/* save_mcount_regs fills in first two parameters */
	save_mcount_regs

	CALL_DEPTH_ACCOUNT

	/* Stack - skipping return address of ftrace_caller */
	leaq MCOUNT_REG_SIZE+8(%rsp), %rcx
	movq %rcx, RSP(%rsp)
@@ -155,6 +159,9 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
	/* Only ops with REGS flag set should have CS register set */
	movq $0, CS(%rsp)

	/* Account for the function call below */
	CALL_DEPTH_ACCOUNT

SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
	ANNOTATE_NOENDBR
	call ftrace_stub
@@ -189,6 +196,8 @@ SYM_FUNC_START(ftrace_regs_caller)
	save_mcount_regs 8
	/* save_mcount_regs fills in first two parameters */

	CALL_DEPTH_ACCOUNT

SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
	ANNOTATE_NOENDBR
	/* Load the ftrace_ops into the 3rd parameter */
@@ -219,6 +228,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
	/* regs go into 4th parameter */
	leaq (%rsp), %rcx

	/* Account for the function call below */
	CALL_DEPTH_ACCOUNT

SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
	ANNOTATE_NOENDBR
	call ftrace_stub
@@ -282,7 +294,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
	int3
.Ldo_rebalance:
	add $8, %rsp
	RET
	ALTERNATIVE __stringify(RET), \
		    __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
		    X86_FEATURE_CALL_DEPTH

SYM_FUNC_END(ftrace_regs_caller)
STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
@@ -291,6 +305,8 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */

SYM_FUNC_START(__fentry__)
	CALL_DEPTH_ACCOUNT

	cmpq $ftrace_stub, ftrace_trace_function
	jnz trace

@@ -347,6 +363,8 @@ SYM_CODE_START(return_to_handler)
	int3
.Ldo_rop:
	mov %rdi, (%rsp)
	RET
	ALTERNATIVE __stringify(RET), \
		    __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
		    X86_FEATURE_CALL_DEPTH
SYM_CODE_END(return_to_handler)
#endif
+6 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <linux/memory.h>
#include <linux/sort.h>
#include <asm/extable.h>
#include <asm/ftrace.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
@@ -2135,6 +2136,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
	prog = image;

	EMIT_ENDBR();
	/*
	 * This is the direct-call trampoline, as such it needs accounting
	 * for the __fentry__ call.
	 */
	x86_call_depth_emit_accounting(&prog, NULL);
	EMIT1(0x55);		 /* push rbp */
	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
	EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
Loading