Commit bce58da1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull kvm fixes from Paolo Bonzini:
 "x86:

   - Account for family 17h event renumberings in AMD PMU emulation

   - Remove CPUID leaf 0xA on AMD processors

   - Fix lockdep issue with locking all vCPUs

   - Fix loss of A/D bits in SPTEs

   - Fix syzkaller issue with invalid guest state"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: VMX: Exit to userspace if vCPU has injected exception and invalid state
  KVM: SEV: Mark nested locking of vcpu->lock
  kvm: x86/cpuid: Only provide CPUID leaf 0xA if host has architectural PMU
  KVM: x86/svm: Account for family 17h event renumberings in amd_pmc_perf_hw_id
  KVM: x86/mmu: Use atomic XCHG to write TDP MMU SPTEs with volatile bits
  KVM: x86/mmu: Move shadow-present check out of spte_has_volatile_bits()
  KVM: x86/mmu: Don't treat fully writable SPTEs as volatile (modulo A/D)
parents 497fe3bb 053d2290
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -887,6 +887,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
		union cpuid10_eax eax;
		union cpuid10_edx edx;

		if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
			break;
		}

		perf_get_x86_pmu_capability(&cap);

		/*
+5 −29
Original line number Diff line number Diff line
@@ -473,30 +473,6 @@ static u64 __get_spte_lockless(u64 *sptep)
}
#endif

static bool spte_has_volatile_bits(u64 spte)
{
	if (!is_shadow_present_pte(spte))
		return false;

	/*
	 * Always atomically update spte if it can be updated
	 * out of mmu-lock, it can ensure dirty bit is not lost,
	 * also, it can help us to get a stable is_writable_pte()
	 * to ensure tlb flush is not missed.
	 */
	if (spte_can_locklessly_be_made_writable(spte) ||
	    is_access_track_spte(spte))
		return true;

	if (spte_ad_enabled(spte)) {
		if ((spte & shadow_accessed_mask) == 0 ||
	    	    (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
			return true;
	}

	return false;
}

/* Rules for using mmu_spte_set:
 * Set the sptep from nonpresent to present.
 * Note: the sptep being assigned *must* be either not present
@@ -557,7 +533,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
	 * we always atomically update it, see the comments in
	 * spte_has_volatile_bits().
	 */
	if (spte_can_locklessly_be_made_writable(old_spte) &&
	if (is_mmu_writable_spte(old_spte) &&
	      !is_writable_pte(new_spte))
		flush = true;

@@ -591,7 +567,8 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
	u64 old_spte = *sptep;
	int level = sptep_to_sp(sptep)->role.level;

	if (!spte_has_volatile_bits(old_spte))
	if (!is_shadow_present_pte(old_spte) ||
	    !spte_has_volatile_bits(old_spte))
		__update_clear_spte_fast(sptep, 0ull);
	else
		old_spte = __update_clear_spte_slow(sptep, 0ull);
@@ -1187,7 +1164,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
	u64 spte = *sptep;

	if (!is_writable_pte(spte) &&
	      !(pt_protect && spte_can_locklessly_be_made_writable(spte)))
	    !(pt_protect && is_mmu_writable_spte(spte)))
		return false;

	rmap_printk("spte %p %llx\n", sptep, *sptep);
@@ -3196,8 +3173,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
		 * be removed in the fast path only if the SPTE was
		 * write-protected for dirty-logging or access tracking.
		 */
		if (fault->write &&
		    spte_can_locklessly_be_made_writable(spte)) {
		if (fault->write && is_mmu_writable_spte(spte)) {
			new_spte |= PT_WRITABLE_MASK;

			/*
+28 −0
Original line number Diff line number Diff line
@@ -90,6 +90,34 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
				     E820_TYPE_RAM);
}

/*
 * Returns true if the SPTE has bits that may be set without holding mmu_lock.
 * The caller is responsible for checking if the SPTE is shadow-present, and
 * for determining whether or not the caller cares about non-leaf SPTEs.
 */
bool spte_has_volatile_bits(u64 spte)
{
	/*
	 * Always atomically update spte if it can be updated
	 * out of mmu-lock, it can ensure dirty bit is not lost,
	 * also, it can help us to get a stable is_writable_pte()
	 * to ensure tlb flush is not missed.
	 */
	if (!is_writable_pte(spte) && is_mmu_writable_spte(spte))
		return true;

	if (is_access_track_spte(spte))
		return true;

	if (spte_ad_enabled(spte)) {
		if (!(spte & shadow_accessed_mask) ||
		    (is_writable_pte(spte) && !(spte & shadow_dirty_mask)))
			return true;
	}

	return false;
}

bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
	       const struct kvm_memory_slot *slot,
	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
+3 −1
Original line number Diff line number Diff line
@@ -390,7 +390,7 @@ static inline void check_spte_writable_invariants(u64 spte)
			  "kvm: Writable SPTE is not MMU-writable: %llx", spte);
}

static inline bool spte_can_locklessly_be_made_writable(u64 spte)
static inline bool is_mmu_writable_spte(u64 spte)
{
	return spte & shadow_mmu_writable_mask;
}
@@ -404,6 +404,8 @@ static inline u64 get_mmio_spte_generation(u64 spte)
	return gen;
}

bool spte_has_volatile_bits(u64 spte);

bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
	       const struct kvm_memory_slot *slot,
	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
+32 −2
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include <linux/kvm_host.h>

#include "mmu.h"
#include "spte.h"

/*
 * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
@@ -17,9 +18,38 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
{
	return READ_ONCE(*rcu_dereference(sptep));
}
static inline void kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 val)

static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
{
	return xchg(rcu_dereference(sptep), new_spte);
}

static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
{
	WRITE_ONCE(*rcu_dereference(sptep), new_spte);
}

static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
					 u64 new_spte, int level)
{
	WRITE_ONCE(*rcu_dereference(sptep), val);
	/*
	 * Atomically write the SPTE if it is a shadow-present, leaf SPTE with
	 * volatile bits, i.e. has bits that can be set outside of mmu_lock.
	 * The Writable bit can be set by KVM's fast page fault handler, and
	 * Accessed and Dirty bits can be set by the CPU.
	 *
	 * Note, non-leaf SPTEs do have Accessed bits and those bits are
	 * technically volatile, but KVM doesn't consume the Accessed bit of
	 * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit.  This
	 * logic needs to be reassessed if KVM were to use non-leaf Accessed
	 * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
	 */
	if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) &&
	    spte_has_volatile_bits(old_spte))
		return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);

	__kvm_tdp_mmu_write_spte(sptep, new_spte);
	return old_spte;
}

/*
Loading