Commit a1c288f8 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-misc-6.4' of https://github.com/kvm-x86/linux into HEAD

KVM x86 changes for 6.4:

 - Optimize CR0.WP toggling by avoiding an MMU reload when TDP is enabled,
   and by giving the guest control of CR0.WP when EPT is enabled on VMX
   (VMX-only because SVM doesn't support per-bit controls)

 - Add CR0/CR4 helpers to query single bits, and clean up related code
   where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long" return
   as a bool

 - Move AMD_PSFD to cpufeatures.h and purge KVM's definition

 - Misc cleanups
parents e1a6d5cf cf9f4c0e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -337,6 +337,7 @@
#define X86_FEATURE_VIRT_SSBD		(13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO		(13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC		(13*32+27) /* Collaborative Processor Performance Control */
#define X86_FEATURE_AMD_PSFD            (13*32+28) /* "" Predictive Store Forwarding Disable */
#define X86_FEATURE_BTC_NO		(13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS			(13*32+31) /* Branch Sampling available */

+3 −9
Original line number Diff line number Diff line
@@ -60,12 +60,6 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
	return ret;
}

/*
 * This one is tied to SSB in the user API, and not
 * visible in /proc/cpuinfo.
 */
#define KVM_X86_FEATURE_AMD_PSFD	(13*32+28) /* Predictive Store Forwarding Disable */

#define F feature_bit

/* Scattered Flag - For features that are scattered by cpufeatures.h. */
@@ -266,7 +260,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
		/* Update OSXSAVE bit */
		if (boot_cpu_has(X86_FEATURE_XSAVE))
			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
				   kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));

		cpuid_entry_change(best, X86_FEATURE_APIC,
			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
@@ -275,7 +269,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
	best = cpuid_entry2_find(entries, nent, 7, 0);
	if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
		cpuid_entry_change(best, X86_FEATURE_OSPKE,
				   kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));

	best = cpuid_entry2_find(entries, nent, 0xD, 0);
	if (best)
@@ -715,7 +709,7 @@ void kvm_set_cpu_caps(void)
		F(CLZERO) | F(XSAVEERPTR) |
		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
		__feature_bit(KVM_X86_FEATURE_AMD_PSFD)
		F(AMD_PSFD)
	);

	/*
+8 −0
Original line number Diff line number Diff line
@@ -1640,6 +1640,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
			goto exception;
		break;
	case VCPU_SREG_CS:
		/*
		 * KVM uses "none" when loading CS as part of emulating Real
		 * Mode exceptions and IRET (handled above).  In all other
		 * cases, loading CS without a control transfer is a KVM bug.
		 */
		if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
			goto exception;

		if (!(seg_desc.type & 8))
			goto exception;

+17 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@

#include <linux/kvm_host.h>

#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR0_GUEST_BITS	(X86_CR0_TS | X86_CR0_WP)
#define KVM_POSSIBLE_CR4_GUEST_BITS				  \
	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
	 | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
@@ -157,6 +157,14 @@ static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
	return vcpu->arch.cr0 & mask;
}

static __always_inline bool kvm_is_cr0_bit_set(struct kvm_vcpu *vcpu,
					       unsigned long cr0_bit)
{
	BUILD_BUG_ON(!is_power_of_2(cr0_bit));

	return !!kvm_read_cr0_bits(vcpu, cr0_bit);
}

static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
{
	return kvm_read_cr0_bits(vcpu, ~0UL);
@@ -171,6 +179,14 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
	return vcpu->arch.cr4 & mask;
}

static __always_inline bool kvm_is_cr4_bit_set(struct kvm_vcpu *vcpu,
					       unsigned long cr4_bit)
{
	BUILD_BUG_ON(!is_power_of_2(cr4_bit));

	return !!kvm_read_cr4_bits(vcpu, cr4_bit);
}

static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
{
	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
+26 −2
Original line number Diff line number Diff line
@@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
				u64 fault_address, char *insn, int insn_len);
void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
					struct kvm_mmu *mmu);

int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
@@ -132,7 +134,7 @@ static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
{
	BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);

	return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)
	return kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)
	       ? cr3 & X86_CR3_PCID_MASK
	       : 0;
}
@@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
					  vcpu->arch.mmu->root_role.level);
}

static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
						    struct kvm_mmu *mmu)
{
	/*
	 * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e.
	 * @mmu's snapshot of CR0.WP and thus all related paging metadata may
	 * be stale.  Refresh CR0.WP and the metadata on-demand when checking
	 * for permission faults.  Exempt nested MMUs, i.e. MMUs for shadowing
	 * nEPT and nNPT, as CR0.WP is ignored in both cases.  Note, KVM does
	 * need to refresh nested_mmu, a.k.a. the walker used to translate L2
	 * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP.
	 */
	if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu)
		return;

	__kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
}

/*
 * Check if a given access (described through the I/D, W/R and U/S bits of a
 * page fault error code pfec) causes a permission fault with the given PTE
@@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
	u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
	bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
	int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1;
	bool fault = (mmu->permissions[index] >> pte_access) & 1;
	u32 errcode = PFERR_PRESENT_MASK;
	bool fault;

	kvm_mmu_refresh_passthrough_bits(vcpu, mmu);

	fault = (mmu->permissions[index] >> pte_access) & 1;

	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
	if (unlikely(mmu->pkru_mask)) {
Loading