Commit 3b6c1747 authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

x86/retpoline: Add SKL retthunk retpolines



Ensure that retpolines do the proper call accounting so that the return
accounting works correctly.

Specifically; retpolines are used to replace both 'jmp *%reg' and
'call *%reg', however these two cases do not have the same accounting
requirements. Therefore split things up and provide two different
retpoline arrays for SKL.

The 'jmp *%reg' case needs no accounting, the
__x86_indirect_jump_thunk_array[] covers this. The retpoline is
changed to not use the return thunk; it's a simple call;ret construct.

[ strictly speaking it should do:
	andq $(~0x1f), PER_CPU_VAR(__x86_call_depth)
  but we can argue this can be covered by the fuzz we already have
  in the accounting depth (12) vs the RSB depth (16) ]

The 'call *%reg' case does need accounting, the
__x86_indirect_call_thunk_array[] covers this. Again, this retpoline
avoids the use of the return-thunk, in this case to avoid double
accounting.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.996634749@infradead.org
parent 5d821386
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -301,6 +301,8 @@

typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];

extern void __x86_return_thunk(void);
extern void zen_untrain_ret(void);
@@ -330,6 +332,16 @@ static inline void x86_set_skl_return_thunk(void) {}
#include <asm/GEN-for-each-reg.h>
#undef GEN

#define GEN(reg)						\
	extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN

#define GEN(reg)						\
	extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN

#ifdef CONFIG_X86_64

/*
+56 −3
Original line number Diff line number Diff line
@@ -377,6 +377,56 @@ static int emit_indirect(int op, int reg, u8 *bytes)
	return i;
}

static inline bool is_jcc32(struct insn *insn)
{
	/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
	return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
}

static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
{
	u8 op = insn->opcode.bytes[0];
	int i = 0;

	/*
	 * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional
	 * tail-calls. Deal with them.
	 */
	if (is_jcc32(insn)) {
		bytes[i++] = op;
		op = insn->opcode.bytes[1];
		goto clang_jcc;
	}

	if (insn->length == 6)
		bytes[i++] = 0x2e; /* CS-prefix */

	switch (op) {
	case CALL_INSN_OPCODE:
		__text_gen_insn(bytes+i, op, addr+i,
				__x86_indirect_call_thunk_array[reg],
				CALL_INSN_SIZE);
		i += CALL_INSN_SIZE;
		break;

	case JMP32_INSN_OPCODE:
clang_jcc:
		__text_gen_insn(bytes+i, op, addr+i,
				__x86_indirect_jump_thunk_array[reg],
				JMP32_INSN_SIZE);
		i += JMP32_INSN_SIZE;
		break;

	default:
		WARN("%pS %px %*ph\n", addr, addr, 6, addr);
		return -1;
	}

	WARN_ON_ONCE(i != insn->length);

	return i;
}

/*
 * Rewrite the compiler generated retpoline thunk calls.
 *
@@ -409,8 +459,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
	BUG_ON(reg == 4);

	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
	    !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
	    !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
		if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
			return emit_call_track_retpoline(addr, insn, reg, bytes);

		return -1;
	}

	op = insn->opcode.bytes[0];

@@ -427,8 +481,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
	 *   [ NOP ]
	 * 1:
	 */
	/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
	if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
	if (is_jcc32(insn)) {
		cc = insn->opcode.bytes[1] & 0xf;
		cc ^= 1; /* invert condition */

+63 −8
Original line number Diff line number Diff line
@@ -14,17 +14,18 @@

	.section .text.__x86.indirect_thunk

.macro RETPOLINE reg

.macro POLINE reg
	ANNOTATE_INTRA_FUNCTION_CALL
	call    .Ldo_rop_\@
.Lspec_trap_\@:
	UNWIND_HINT_EMPTY
	pause
	lfence
	jmp .Lspec_trap_\@
	int3
.Ldo_rop_\@:
	mov     %\reg, (%_ASM_SP)
	UNWIND_HINT_FUNC
.endm

.macro RETPOLINE reg
	POLINE \reg
	RET
.endm

@@ -54,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
 */

#define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
#define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)

	.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_thunk_array)
@@ -66,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array)
	.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_thunk_array)

#define GEN(reg) EXPORT_THUNK(reg)
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN

#ifdef CONFIG_CALL_DEPTH_TRACKING
.macro CALL_THUNK reg
	.align RETPOLINE_THUNK_SIZE

SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL)
	UNWIND_HINT_EMPTY
	ANNOTATE_NOENDBR

	CALL_DEPTH_ACCOUNT
	POLINE \reg
	ANNOTATE_UNRET_SAFE
	ret
	int3
.endm

	.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_call_thunk_array)

#define GEN(reg) CALL_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN

	.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_call_thunk_array)

#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN

.macro JUMP_THUNK reg
	.align RETPOLINE_THUNK_SIZE

SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL)
	UNWIND_HINT_EMPTY
	ANNOTATE_NOENDBR
	POLINE \reg
	ANNOTATE_UNRET_SAFE
	ret
	int3
.endm

	.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_jump_thunk_array)

#define GEN(reg) JUMP_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN

	.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_jump_thunk_array)

#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#endif
/*
 * This function name is magical and is used by -mfunction-return=thunk-extern
 * for the compiler to generate JMPs to it.
+4 −1
Original line number Diff line number Diff line
@@ -417,6 +417,9 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
		EMIT2(0xFF, 0xE0 + reg);
	} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
		OPTIMIZER_HIDE_VAR(reg);
		if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
			emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
		else
			emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
	} else {
		EMIT2(0xFF, 0xE0 + reg);	/* jmp *%\reg */