Commit 3e763ec7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull KVM fixes from Paolo Bonzini:
 "ARM:

   - Plug race between enabling MTE and creating vcpus

   - Fix off-by-one bug when checking whether an address range is RAM

  x86:

   - Fixes for the new MMU, especially a memory leak on hosts with <39
     physical address bits

   - Remove bogus EFER.NX checks on 32-bit non-PAE hosts

   - WAITPKG fix"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86/mmu: Protect marking SPs unsync when using TDP MMU with spinlock
  KVM: x86/mmu: Don't step down in the TDP iterator when zapping all SPTEs
  KVM: x86/mmu: Don't leak non-leaf SPTEs when zapping all SPTEs
  KVM: nVMX: Use vmx_need_pf_intercept() when deciding if L0 wants a #PF
  kvm: vmx: Sync all matching EPTPs when injecting nested EPT fault
  KVM: x86: remove dead initialization
  KVM: x86: Allow guest to set EFER.NX=1 on non-PAE 32-bit kernels
  KVM: VMX: Use current VMCS to query WAITPKG support for MSR emulation
  KVM: arm64: Fix race when enabling KVM_ARM_CAP_MTE
  KVM: arm64: Fix off-by-one in range_is_memory
parents 0aa78d17 6e949ddb
Loading
Loading
Loading
Loading
+4 −4
Original line number Original line Diff line number Diff line
@@ -25,10 +25,10 @@ On x86:


- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock


- kvm->arch.mmu_lock is an rwlock.  kvm->arch.tdp_mmu_pages_lock is
- kvm->arch.mmu_lock is an rwlock.  kvm->arch.tdp_mmu_pages_lock and
  taken inside kvm->arch.mmu_lock, and cannot be taken without already
  kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
  holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
  cannot be taken without already holding kvm->arch.mmu_lock (typically with
  there's no need to take kvm->arch.tdp_mmu_pages_lock at all).
  ``read_lock`` for the TDP MMU, thus the need for additional spinlocks).


Everything else is a leaf: no other lock is taken inside the critical
Everything else is a leaf: no other lock is taken inside the critical
sections.
sections.
+8 −4
Original line number Original line Diff line number Diff line
@@ -94,10 +94,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
		kvm->arch.return_nisv_io_abort_to_user = true;
		kvm->arch.return_nisv_io_abort_to_user = true;
		break;
		break;
	case KVM_CAP_ARM_MTE:
	case KVM_CAP_ARM_MTE:
		if (!system_supports_mte() || kvm->created_vcpus)
		mutex_lock(&kvm->lock);
			return -EINVAL;
		if (!system_supports_mte() || kvm->created_vcpus) {
			r = -EINVAL;
		} else {
			r = 0;
			r = 0;
			kvm->arch.mte_enabled = true;
			kvm->arch.mte_enabled = true;
		}
		mutex_unlock(&kvm->lock);
		break;
		break;
	default:
	default:
		r = -EINVAL;
		r = -EINVAL;
+1 −1
Original line number Original line Diff line number Diff line
@@ -193,7 +193,7 @@ static bool range_is_memory(u64 start, u64 end)
{
{
	struct kvm_mem_range r1, r2;
	struct kvm_mem_range r1, r2;


	if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2))
	if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2))
		return false;
		return false;
	if (r1.start != r2.start)
	if (r1.start != r2.start)
		return false;
		return false;
+7 −0
Original line number Original line Diff line number Diff line
@@ -1038,6 +1038,13 @@ struct kvm_arch {
	struct list_head lpage_disallowed_mmu_pages;
	struct list_head lpage_disallowed_mmu_pages;
	struct kvm_page_track_notifier_node mmu_sp_tracker;
	struct kvm_page_track_notifier_node mmu_sp_tracker;
	struct kvm_page_track_notifier_head track_notifier_head;
	struct kvm_page_track_notifier_head track_notifier_head;
	/*
	 * Protects marking pages unsync during page faults, as TDP MMU page
	 * faults only take mmu_lock for read.  For simplicity, the unsync
	 * pages lock is always taken when marking pages unsync regardless of
	 * whether mmu_lock is held for read or write.
	 */
	spinlock_t mmu_unsync_pages_lock;


	struct list_head assigned_dev_head;
	struct list_head assigned_dev_head;
	struct iommu_domain *iommu_domain;
	struct iommu_domain *iommu_domain;
+1 −27
Original line number Original line Diff line number Diff line
@@ -208,30 +208,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
	kvm_mmu_after_set_cpuid(vcpu);
	kvm_mmu_after_set_cpuid(vcpu);
}
}


static int is_efer_nx(void)
{
	return host_efer & EFER_NX;
}

static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
{
	int i;
	struct kvm_cpuid_entry2 *e, *entry;

	entry = NULL;
	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
		e = &vcpu->arch.cpuid_entries[i];
		if (e->function == 0x80000001) {
			entry = e;
			break;
		}
	}
	if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) {
		cpuid_entry_clear(entry, X86_FEATURE_NX);
		printk(KERN_INFO "kvm: guest NX capability removed\n");
	}
}

int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
{
	struct kvm_cpuid_entry2 *best;
	struct kvm_cpuid_entry2 *best;
@@ -302,7 +278,6 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
	vcpu->arch.cpuid_entries = e2;
	vcpu->arch.cpuid_entries = e2;
	vcpu->arch.cpuid_nent = cpuid->nent;
	vcpu->arch.cpuid_nent = cpuid->nent;


	cpuid_fix_nx_cap(vcpu);
	kvm_update_cpuid_runtime(vcpu);
	kvm_update_cpuid_runtime(vcpu);
	kvm_vcpu_after_set_cpuid(vcpu);
	kvm_vcpu_after_set_cpuid(vcpu);


@@ -401,7 +376,6 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)


void kvm_set_cpu_caps(void)
void kvm_set_cpu_caps(void)
{
{
	unsigned int f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
#ifdef CONFIG_X86_64
	unsigned int f_gbpages = F(GBPAGES);
	unsigned int f_gbpages = F(GBPAGES);
	unsigned int f_lm = F(LM);
	unsigned int f_lm = F(LM);
@@ -515,7 +489,7 @@ void kvm_set_cpu_caps(void)
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
		F(PAT) | F(PSE36) | 0 /* Reserved */ |
		F(PAT) | F(PSE36) | 0 /* Reserved */ |
		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
		F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
	);
	);
Loading