Commit 23c1ad53 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

x86/alternatives: Optimize optimize_nops()



Currently, optimize_nops() scans to see if the alternative starts with
NOPs. However, the emit pattern is:

  141:	\oldinstr
  142:	.skip (len-(142b-141b)), 0x90

That is, when 'oldinstr' is short, the tail is padded with NOPs. This case
never gets optimized.

Rewrite optimize_nops() to replace any trailing string of NOPs inside
the alternative to larger NOPs. Also run it irrespective of patching,
replacing NOPs in both the original and replaced code.

A direct consequence is that 'padlen' becomes superfluous, so remove it.

 [ bp:
   - Adjust commit message
   - remove a stale comment about needing to pad
   - add a comment in optimize_nops()
   - exit early if the NOP verif. loop catches a mismatch - function
     should not not add NOPs in that case
   - fix the "optimized NOPs" offsets output ]

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20210326151259.442992235@infradead.org
parent b1f480bc
Loading
Loading
Loading
Loading
+5 −12
Original line number Diff line number Diff line
@@ -65,7 +65,6 @@ struct alt_instr {
	u16 cpuid;		/* cpuid bit set for replacement */
	u8  instrlen;		/* length of original instruction */
	u8  replacementlen;	/* length of new instruction */
	u8  padlen;		/* length of build-time padding */
} __packed;

/*
@@ -104,7 +103,6 @@ static inline int alternatives_text_reserved(void *start, void *end)

#define alt_end_marker		"663"
#define alt_slen		"662b-661b"
#define alt_pad_len		alt_end_marker"b-662b"
#define alt_total_slen		alt_end_marker"b-661b"
#define alt_rlen(num)		e_replacement(num)"f-"b_replacement(num)"f"

@@ -151,8 +149,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
	" .word " __stringify(feature) "\n"		/* feature bit     */ \
	" .byte " alt_total_slen "\n"			/* source len      */ \
	" .byte " alt_rlen(num) "\n"			/* replacement len */ \
	" .byte " alt_pad_len "\n"			/* pad len */
	" .byte " alt_rlen(num) "\n"			/* replacement len */

#define ALTINSTR_REPLACEMENT(newinstr, num)		/* replacement */	\
	"# ALT: replacement " #num "\n"						\
@@ -224,9 +221,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
 * Peculiarities:
 * No memory clobber here.
 * Argument numbers start with 1.
 * Best is to use constraints that are fixed size (like (%1) ... "r")
 * If you use variable sized constraints like "m" or "g" in the
 * replacement make sure to pad to the worst case length.
 * Leaving an unused argument 0 to keep API compatibility.
 */
#define alternative_input(oldinstr, newinstr, feature, input...)	\
@@ -315,13 +309,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
 * enough information for the alternatives patching code to patch an
 * instruction. See apply_alternatives().
 */
.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
.macro altinstruction_entry orig alt feature orig_len alt_len
	.long \orig - .
	.long \alt - .
	.word \feature
	.byte \orig_len
	.byte \alt_len
	.byte \pad_len
.endm

/*
@@ -338,7 +331,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
142:

	.pushsection .altinstructions,"a"
	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
	.popsection

	.pushsection .altinstr_replacement,"ax"
@@ -375,8 +368,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
142:

	.pushsection .altinstructions,"a"
	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
	.popsection

	.pushsection .altinstr_replacement,"ax"
+31 −18
Original line number Diff line number Diff line
@@ -189,19 +189,35 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
{
	unsigned long flags;
	int i;
	struct insn insn;
	int nop, i = 0;

	/*
	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
	 * NOPs, optimize them.
	 */
	for (;;) {
		if (insn_decode_kernel(&insn, &instr[i]))
			return;

		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
			break;

		if ((i += insn.length) >= a->instrlen)
			return;
	}

	for (i = 0; i < a->padlen; i++) {
		if (instr[i] != 0x90)
	for (nop = i; i < a->instrlen; i++) {
		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
			return;
	}

	local_irq_save(flags);
	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
	add_nops(instr + nop, i - nop);
	local_irq_restore(flags);

	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
		   instr, a->instrlen - a->padlen, a->padlen);
		   instr, nop, a->instrlen);
}

/*
@@ -247,19 +263,15 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
		 * - feature not present but ALTINSTR_FLAG_INV is set to mean,
		 *   patch if feature is *NOT* present.
		 */
		if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
			if (a->padlen > 1)
				optimize_nops(a, instr);

			continue;
		}
		if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
			goto next;

		DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
		DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
			(a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
			feature >> 5,
			feature & 0x1f,
			instr, instr, a->instrlen,
			replacement, a->replacementlen, a->padlen);
			replacement, a->replacementlen);

		DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
		DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
@@ -283,14 +295,15 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
		if (a->replacementlen && is_jmp(replacement[0]))
			recompute_jump(a, instr, replacement, insn_buff);

		if (a->instrlen > a->replacementlen) {
			add_nops(insn_buff + a->replacementlen,
				 a->instrlen - a->replacementlen);
			insn_buff_sz += a->instrlen - a->replacementlen;
		}
		for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
			insn_buff[insn_buff_sz] = 0x90;

		DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);

		text_poke_early(instr, insn_buff, insn_buff_sz);

next:
		optimize_nops(a, instr);
	}
}

+1 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@
#define JUMP_ORIG_OFFSET	0
#define JUMP_NEW_OFFSET		4

#define ALT_ENTRY_SIZE		13
#define ALT_ENTRY_SIZE		12
#define ALT_ORIG_OFFSET		0
#define ALT_NEW_OFFSET		4
#define ALT_FEATURE_OFFSET	8