Unverified Commit 9daca9a5 authored by Palmer Dabbelt's avatar Palmer Dabbelt
Browse files

Merge patch series "riscv: improve boot time isa extensions handling"

Jisheng Zhang <jszhang@kernel.org> says:

Generally, riscv ISA extensions are fixed for any specific hardware
platform, so a hart's features won't change after booting, this
chacteristic makes it straightforward to use a static branch to check
a specific ISA extension is supported or not to optimize performance.

However, some ISA extensions such as SVPBMT and ZICBOM are handled
via. the alternative sequences.

Basically, for ease of maintenance, we prefer to use static branches
in C code, but recently, Samuel found that the static branch usage in
cpu_relax() breaks building with CONFIG_CC_OPTIMIZE_FOR_SIZE[1]. As
Samuel pointed out, "Having a static branch in cpu_relax() is
problematic because that function is widely inlined, including in some
quite complex functions like in the VDSO. A quick measurement shows
this static branch is responsible by itself for around 40% of the jump
table."

Samuel's findings pointed out one of a few downsides of static branches
usage in C code to handle ISA extensions detected at boot time:
static branch's metadata in the __jump_table section, which is not
discarded after ISA extensions are finalized, wastes some space.

I want to try to solve the issue for all possible dynamic handling of
ISA extensions at boot time. Inspired by Mark[2], this patch introduces
riscv_has_extension_*() helpers, which work like static branches but
are patched using alternatives, thus the metadata can be freed after
patching.

[1]https://lore.kernel.org/linux-riscv/20220922060958.44203-1-samuel@sholland.org/
[2]https://lore.kernel.org/linux-arm-kernel/20220912162210.3626215-8-mark.rutland@arm.com/
[3]https://lore.kernel.org/linux-riscv/20221130225614.1594256-1-heiko@sntech.de/

* b4-shazam-merge:
  riscv: remove riscv_isa_ext_keys[] array and related usage
  riscv: KVM: Switch has_svinval() to riscv_has_extension_unlikely()
  riscv: cpu_relax: switch to riscv_has_extension_likely()
  riscv: alternative: patch alternatives in the vDSO
  riscv: switch to relative alternative entries
  riscv: module: Add ADD16 and SUB16 rela types
  riscv: module: move find_section to module.h
  riscv: fpu: switch has_fpu() to riscv_has_extension_likely()
  riscv: introduce riscv_has_extension_[un]likely()
  riscv: cpufeature: extend riscv_cpufeature_patch_func to all ISA extensions
  riscv: hwcap: make ISA extension ids can be used in asm
  riscv: cpufeature: detect RISCV_ALTERNATIVES_EARLY_BOOT earlier
  riscv: move riscv_noncoherent_supported() out of ZICBOM probe

Link: https://lore.kernel.org/r/20230128172856.3814-1-jszhang@kernel.org


Signed-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parents 75ab93a2 03966594
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -107,7 +107,8 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin,

		tmp = (1U << alt->errata_id);
		if (cpu_req_errata & tmp) {
			patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
			patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt),
					  alt->alt_len);
			cpu_apply_errata |= tmp;
		}
	}
+8 −3
Original line number Diff line number Diff line
@@ -87,6 +87,7 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
	struct alt_entry *alt;
	u32 cpu_req_errata = thead_errata_probe(stage, archid, impid);
	u32 tmp;
	void *oldptr, *altptr;

	for (alt = begin; alt < end; alt++) {
		if (alt->vendor_id != THEAD_VENDOR_ID)
@@ -96,12 +97,16 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al

		tmp = (1U << alt->errata_id);
		if (cpu_req_errata & tmp) {
			oldptr = ALT_OLD_PTR(alt);
			altptr = ALT_ALT_PTR(alt);

			/* On vm-alternatives, the mmu isn't running yet */
			if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
				memcpy((void *)__pa_symbol(alt->old_ptr),
				       (void *)__pa_symbol(alt->alt_ptr), alt->alt_len);
				memcpy((void *)__pa_symbol(oldptr),
				       (void *)__pa_symbol(altptr),
				       alt->alt_len);
			else
				patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
				patch_text_nosync(oldptr, altptr, alt->alt_len);
		}
	}

+10 −10
Original line number Diff line number Diff line
@@ -7,11 +7,11 @@
#ifdef __ASSEMBLY__

.macro ALT_ENTRY oldptr newptr vendor_id errata_id new_len
	RISCV_PTR \oldptr
	RISCV_PTR \newptr
	REG_ASM \vendor_id
	REG_ASM \new_len
	.word	\errata_id
	.4byte \oldptr - .
	.4byte \newptr - .
	.2byte \vendor_id
	.2byte \new_len
	.4byte \errata_id
.endm

.macro ALT_NEW_CONTENT vendor_id, errata_id, enable = 1, new_c : vararg
@@ -59,11 +59,11 @@
#include <linux/stringify.h>

#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen)		\
	RISCV_PTR " " oldptr "\n"					\
	RISCV_PTR " " newptr "\n"					\
	REG_ASM " " vendor_id "\n"					\
	REG_ASM " " newlen "\n"						\
	".word " errata_id "\n"
	".4byte	((" oldptr ") - .) \n"					\
	".4byte	((" newptr ") - .) \n"					\
	".2byte	" vendor_id "\n"					\
	".2byte " newlen "\n"						\
	".4byte	" errata_id "\n"

#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c)		\
	".if " __stringify(enable) " == 1\n"				\
+11 −6
Original line number Diff line number Diff line
@@ -23,6 +23,11 @@
#define RISCV_ALTERNATIVES_MODULE	1 /* alternatives applied during module-init */
#define RISCV_ALTERNATIVES_EARLY_BOOT	2 /* alternatives applied before mmu start */

/* add the relative offset to the address of the offset to get the absolute address */
#define __ALT_PTR(a, f)			((void *)&(a)->f + (a)->f)
#define ALT_OLD_PTR(a)			__ALT_PTR(a, old_offset)
#define ALT_ALT_PTR(a)			__ALT_PTR(a, alt_offset)

void __init apply_boot_alternatives(void);
void __init apply_early_boot_alternatives(void);
void apply_module_alternatives(void *start, size_t length);
@@ -31,12 +36,12 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len,
				   int patch_offset);

struct alt_entry {
	void *old_ptr;		 /* address of original instruciton or data  */
	void *alt_ptr;		 /* address of replacement instruction or data */
	unsigned long vendor_id; /* cpu vendor id */
	unsigned long alt_len;   /* The replacement size */
	unsigned int errata_id;  /* The errata id */
} __packed;
	s32 old_offset;		/* offset relative to original instruction or data  */
	s32 alt_offset;		/* offset relative to replacement instruction or data */
	u16 vendor_id;		/* cpu vendor id */
	u16 alt_len;		/* The replacement size */
	u32 errata_id;		/* The errata id */
};

struct errata_checkfunc_id {
	unsigned long vendor_id;
+3 −2
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@

#include <asm/alternative.h>
#include <asm/csr.h>
#include <asm/hwcap.h>
#include <asm/vendorid_list.h>

#ifdef CONFIG_ERRATA_SIFIVE
@@ -56,7 +57,7 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
#define ALT_SVPBMT(_val, prot)						\
asm(ALTERNATIVE_2("li %0, 0\t\nnop",					\
		  "li %0, %1\t\nslli %0,%0,%3", 0,			\
			CPUFEATURE_SVPBMT, CONFIG_RISCV_ISA_SVPBMT,	\
			RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT,	\
		  "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID,	\
			ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT)	\
		: "=r"(_val)						\
@@ -130,7 +131,7 @@ asm volatile(ALTERNATIVE_2( \
	"add a0, a0, %0\n\t"						\
	"2:\n\t"							\
	"bltu a0, %2, 3b\n\t"						\
	"nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,		\
	"nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,	\
	"mv a0, %1\n\t"							\
	"j 2f\n\t"							\
	"3:\n\t"							\
Loading