Commit cc744042 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files
KVM/arm64 updates for 6.5

 - Eager page splitting optimization for dirty logging, optionally
   allowing for a VM to avoid the cost of block splitting in the stage-2
   fault path.

 - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with
   services that live in the Secure world. pKVM intervenes on FF-A calls
   to guarantee the host doesn't misuse memory donated to the hyp or a
   pKVM guest.

 - Support for running the split hypervisor with VHE enabled, known as
   'hVHE' mode. This is extremely useful for testing the split
   hypervisor on VHE-only systems, and paves the way for new use cases
   that depend on having two TTBRs available at EL2.

 - Generalized framework for configurable ID registers from userspace.
   KVM/arm64 currently prevents arbitrary CPU feature set configuration
   from userspace, but the intent is to relax this limitation and allow
   userspace to select a feature set consistent with the CPU.

 - Enable the use of Branch Target Identification (FEAT_BTI) in the
   hypervisor.

 - Use a separate set of pointer authentication keys for the hypervisor
   when running in protected mode, as the host is untrusted at runtime.

 - Ensure timer IRQs are consistently released in the init failure
   paths.

 - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps
   (FEAT_EVT), as it is a register commonly read from userspace.

 - Erratum workaround for the upcoming AmpereOne part, which has broken
   hardware A/D state management.

As a consequence of the hVHE series reworking the arm64 software
features framework, the for-next/module-alloc branch from the arm64 tree
comes along for the ride.
parents b5396271 192df2aa
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
  0000000000000000	0000ffffffffffff	 256TB		user
  ffff000000000000	ffff7fffffffffff	 128TB		kernel logical memory map
 [ffff600000000000	ffff7fffffffffff]	  32TB		[kasan shadow region]
  ffff800000000000	ffff800007ffffff	 128MB		modules
  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
  ffff800000000000	ffff80007fffffff	   2GB		modules
  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
  fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
  fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
  fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
@@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
  0000000000000000	000fffffffffffff	   4PB		user
  fff0000000000000	ffff7fffffffffff	  ~4PB		kernel logical memory map
 [fffd800000000000	ffff7fffffffffff]	 512TB		[kasan shadow region]
  ffff800000000000	ffff800007ffffff	 128MB		modules
  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
  ffff800000000000	ffff80007fffffff	   2GB		modules
  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
  fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
  fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
  fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
+3 −0
Original line number Diff line number Diff line
@@ -52,6 +52,9 @@ stable kernels.
| Allwinner      | A64/R18         | UNKNOWN1        | SUN50I_ERRATUM_UNKNOWN1     |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Ampere         | AmpereOne       | AC03_CPU_38     | AMPERE_ERRATUM_AC03_CPU_38  |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| ARM            | Cortex-A510     | #2457168        | ARM64_ERRATUM_2457168       |
+----------------+-----------------+-----------------+-----------------------------+
| ARM            | Cortex-A510     | #2064142        | ARM64_ERRATUM_2064142       |
+27 −0
Original line number Diff line number Diff line
@@ -8445,6 +8445,33 @@ structure.
When getting the Modified Change Topology Report value, the attr->addr
must point to a byte where the value will be stored or retrieved from.

8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
---------------------------------------

:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
:Architectures: arm64
:Type: vm
:Parameters: arg[0] is the new split chunk size.
:Returns: 0 on success, -EINVAL if any memslot was already created.

This capability sets the chunk size used in Eager Page Splitting.

Eager Page Splitting improves the performance of dirty-logging (used
in live migrations) when guest memory is backed by huge-pages.  It
avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
it eagerly when enabling dirty logging (with the
KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
KVM_CLEAR_DIRTY_LOG.

The chunk size specifies how many pages to break at a time, using a
single allocation for each chunk. Bigger the chunk size, more pages
need to be allocated ahead of time.

The chunk size needs to be a valid block size. The list of acceptable
block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
64-bit bitmap (each bit describing a block size). The default value is
0, to disable the eager page splitting.

9. Known KVM API problems
=========================

+22 −25
Original line number Diff line number Diff line
@@ -207,6 +207,7 @@ config ARM64
	select HAVE_IOREMAP_PROT
	select HAVE_IRQ_TIME_ACCOUNTING
	select HAVE_KVM
	select HAVE_MOD_ARCH_SPECIFIC
	select HAVE_NMI
	select HAVE_PERF_EVENTS
	select HAVE_PERF_REGS
@@ -406,6 +407,25 @@ menu "Kernel Features"

menu "ARM errata workarounds via the alternatives framework"

config AMPERE_ERRATUM_AC03_CPU_38
        bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
	default y
	help
	  This option adds an alternative code sequence to work around Ampere
	  erratum AC03_CPU_38 on AmpereOne.

	  The affected design reports FEAT_HAFDBS as not implemented in
	  ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0
	  as required by the architecture. The unadvertised HAFDBS
	  implementation suffers from an additional erratum where hardware
	  A/D updates can occur after a PTE has been marked invalid.

	  The workaround forces KVM to explicitly set VTCR_EL2.HA to 0,
	  which avoids enabling unadvertised hardware Access Flag management
	  at stage-2.

	  If unsure, say Y.

config ARM64_WORKAROUND_CLEAN_CACHE
	bool

@@ -577,7 +597,6 @@ config ARM64_ERRATUM_845719
config ARM64_ERRATUM_843419
	bool "Cortex-A53: 843419: A load or store might access an incorrect address"
	default y
	select ARM64_MODULE_PLTS if MODULES
	help
	  This option links the kernel with '--fix-cortex-a53-843419' and
	  enables PLT support to replace certain ADRP instructions, which can
@@ -2107,26 +2126,6 @@ config ARM64_SME
	  register state capable of holding two dimensional matrix tiles to
	  enable various matrix operations.

config ARM64_MODULE_PLTS
	bool "Use PLTs to allow module memory to spill over into vmalloc area"
	depends on MODULES
	select HAVE_MOD_ARCH_SPECIFIC
	help
	  Allocate PLTs when loading modules so that jumps and calls whose
	  targets are too far away for their relative offsets to be encoded
	  in the instructions themselves can be bounced via veneers in the
	  module's PLT. This allows modules to be allocated in the generic
	  vmalloc area after the dedicated module memory area has been
	  exhausted.

	  When running with address space randomization (KASLR), the module
	  region itself may be too far away for ordinary relative jumps and
	  calls, and so in that case, module PLTs are required and cannot be
	  disabled.

	  Specific errata workaround(s) might also force module PLTs to be
	  enabled (ARM64_ERRATUM_843419).

config ARM64_PSEUDO_NMI
	bool "Support for NMI-like interrupts"
	select ARM_GIC_V3
@@ -2167,7 +2166,6 @@ config RELOCATABLE

config RANDOMIZE_BASE
	bool "Randomize the address of the kernel image"
	select ARM64_MODULE_PLTS if MODULES
	select RELOCATABLE
	help
	  Randomizes the virtual address at which the kernel image is
@@ -2198,9 +2196,8 @@ config RANDOMIZE_MODULE_REGION_FULL
	  When this option is not set, the module region will be randomized over
	  a limited range that contains the [_stext, _etext] interval of the
	  core kernel, so branch relocations are almost always in range unless
	  ARM64_MODULE_PLTS is enabled and the region is exhausted. In this
	  particular case of region exhaustion, modules might be able to fall
	  back to a larger 2GB area.
	  the region is exhausted. In this particular case of region
	  exhaustion, modules might be able to fall back to a larger 2GB area.

config CC_HAVE_STACKPROTECTOR_SYSREG
	def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0)
+6 −0
Original line number Diff line number Diff line
@@ -15,6 +15,9 @@
#define MAX_CPU_FEATURES	128
#define cpu_feature(x)		KERNEL_HWCAP_ ## x

#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR	0
#define ARM64_SW_FEATURE_OVERRIDE_HVHE		4

#ifndef __ASSEMBLY__

#include <linux/bug.h>
@@ -915,6 +918,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
	return 8;
}

s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);

extern struct arm64_ftr_override id_aa64mmfr1_override;
@@ -925,6 +929,8 @@ extern struct arm64_ftr_override id_aa64smfr0_override;
extern struct arm64_ftr_override id_aa64isar1_override;
extern struct arm64_ftr_override id_aa64isar2_override;

extern struct arm64_ftr_override arm64_sw_feature_override;

u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);

Loading