Commit 084cc29f authored by Ben Gardon's avatar Ben Gardon Committed by Paolo Bonzini
Browse files

KVM: x86/MMU: Allow NX huge pages to be disabled on a per-vm basis



In some cases, the NX hugepage mitigation for iTLB multihit is not
needed for all guests on a host. Allow disabling the mitigation on a
per-VM basis to avoid the performance hit of NX hugepages on trusted
workloads.

In order to disable NX hugepages on a VM, ensure that the userspace
actor has permission to reboot the system. Since disabling NX hugepages
would allow a guest to crash the system, it is similar to reboot
permissions.

Ideally, KVM would require userspace to prove it has access to KVM's
nx_huge_pages module param, e.g. so that userspace can opt out without
needing full reboot permissions.  But getting access to the module param
file info is difficult because it is buried in layers of sysfs and module
glue. Requiring CAP_SYS_BOOT is sufficient for all known use cases.

Suggested-by: default avatarJim Mattson <jmattson@google.com>
Reviewed-by: default avatarDavid Matlack <dmatlack@google.com>
Reviewed-by: default avatarPeter Xu <peterx@redhat.com>
Signed-off-by: default avatarBen Gardon <bgardon@google.com>
Message-Id: <20220613212523.3436117-9-bgardon@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 1c4dc573
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -8206,6 +8206,22 @@ PV guests. The `KVM_PV_DUMP` command is available for the
dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is
available and supports the `KVM_PV_DUMP_CPU` subcommand.

8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
---------------------------

:Capability KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
:Architectures: x86
:Type: vm
:Parameters: arg[0] must be 0.
:Returns 0 on success, -EPERM if the userspace process does not
	 have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
	 created.

This capability disables the NX huge pages mitigation for iTLB MULTIHIT.

The capability has no effect if the nx_huge_pages module parameter is not set.

This capability may only be set before any vCPUs are created.

9. Known KVM API problems
=========================
+2 −0
Original line number Diff line number Diff line
@@ -1336,6 +1336,8 @@ struct kvm_arch {
	 * the global KVM_MAX_VCPU_IDS may lead to significant memory waste.
	 */
	u32 max_vcpu_ids;

	bool disable_nx_huge_pages;
};

struct kvm_vm_stat {
+4 −3
Original line number Diff line number Diff line
@@ -155,9 +155,9 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);

extern int nx_huge_pages;
static inline bool is_nx_huge_page_enabled(void)
static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
{
	return READ_ONCE(nx_huge_pages);
	return READ_ONCE(nx_huge_pages) && !kvm->arch.disable_nx_huge_pages;
}

struct kvm_page_fault {
@@ -256,7 +256,8 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
		.user = err & PFERR_USER_MASK,
		.prefetch = prefetch,
		.is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
		.nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(),
		.nx_huge_page_workaround_enabled =
			is_nx_huge_page_enabled(vcpu->kvm),

		.max_level = KVM_MAX_HUGEPAGE_LEVEL,
		.req_level = PG_LEVEL_4K,
+4 −3
Original line number Diff line number Diff line
@@ -147,7 +147,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
		spte |= spte_shadow_accessed_mask(spte);

	if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
	    is_nx_huge_page_enabled()) {
	    is_nx_huge_page_enabled(vcpu->kvm)) {
		pte_access &= ~ACC_EXEC_MASK;
	}

@@ -246,7 +246,8 @@ static u64 make_spte_executable(u64 spte)
 * This is used during huge page splitting to build the SPTEs that make up the
 * new page table.
 */
u64 make_huge_page_split_spte(u64 huge_spte, int huge_level, int index)
u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte, int huge_level,
			      int index)
{
	u64 child_spte;
	int child_level;
@@ -274,7 +275,7 @@ u64 make_huge_page_split_spte(u64 huge_spte, int huge_level, int index)
		 * When splitting to a 4K page, mark the page executable as the
		 * NX hugepage mitigation no longer applies.
		 */
		if (is_nx_huge_page_enabled())
		if (is_nx_huge_page_enabled(kvm))
			child_spte = make_spte_executable(child_spte);
	}

+2 −1
Original line number Diff line number Diff line
@@ -421,7 +421,8 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
	       u64 old_spte, bool prefetch, bool can_unsync,
	       bool host_writable, u64 *new_spte);
u64 make_huge_page_split_spte(u64 huge_spte, int huge_level, int index);
u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte, int huge_level,
			      int index);
u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled);
u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access);
u64 mark_spte_for_access_track(u64 spte);
Loading