Commit 2acf923e authored by Dexuan Cui's avatar Dexuan Cui Committed by Avi Kivity
Browse files

KVM: VMX: Enable XSAVE/XRSTOR for guest



This patch enable guest to use XSAVE/XRSTOR instructions.

We assume that host_xcr0 would use all possible bits that OS supported.

And we loaded xcr0 in the same way we handled fpu - do it as late as we can.

Signed-off-by: default avatarDexuan Cui <dexuan.cui@intel.com>
Signed-off-by: default avatarSheng Yang <sheng@linux.intel.com>
Reviewed-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent f495c6e5
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
	} update_pte;

	struct fpu guest_fpu;
	u64 xcr0;

	gva_t mmio_fault_cr2;
	struct kvm_pio_request pio;
@@ -605,6 +606,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);

int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+1 −0
Original line number Diff line number Diff line
@@ -267,6 +267,7 @@ enum vmcs_field {
#define EXIT_REASON_EPT_VIOLATION       48
#define EXIT_REASON_EPT_MISCONFIG       49
#define EXIT_REASON_WBINVD		54
#define EXIT_REASON_XSETBV		55

/*
 * Interruption-information format
+6 −0
Original line number Diff line number Diff line
@@ -71,4 +71,10 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
	return kvm_read_cr4_bits(vcpu, ~0UL);
}

static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
{
	return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
		| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
}

#endif
+13 −0
Original line number Diff line number Diff line
@@ -37,6 +37,8 @@
#include <asm/vmx.h>
#include <asm/virtext.h>
#include <asm/mce.h>
#include <asm/i387.h>
#include <asm/xcr.h>

#include "trace.h"

@@ -3390,6 +3392,16 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
	return 1;
}

static int handle_xsetbv(struct kvm_vcpu *vcpu)
{
	u64 new_bv = kvm_read_edx_eax(vcpu);
	u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);

	if (kvm_set_xcr(vcpu, index, new_bv) == 0)
		skip_emulated_instruction(vcpu);
	return 1;
}

static int handle_apic_access(struct kvm_vcpu *vcpu)
{
	return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
@@ -3668,6 +3680,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
+123 −7
Original line number Diff line number Diff line
@@ -65,6 +65,7 @@
	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
			  | X86_CR4_OSXSAVE \
			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))

#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -150,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
	{ NULL }
};

u64 __read_mostly host_xcr0;

static inline u32 bit(int bitno)
{
	return 1 << (bitno & 31);
}

static void kvm_on_user_return(struct user_return_notifier *urn)
{
	unsigned slot;
@@ -474,6 +482,61 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
}
EXPORT_SYMBOL_GPL(kvm_lmsw);

int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
	u64 xcr0;

	/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
	if (index != XCR_XFEATURE_ENABLED_MASK)
		return 1;
	xcr0 = xcr;
	if (kvm_x86_ops->get_cpl(vcpu) != 0)
		return 1;
	if (!(xcr0 & XSTATE_FP))
		return 1;
	if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
		return 1;
	if (xcr0 & ~host_xcr0)
		return 1;
	vcpu->arch.xcr0 = xcr0;
	vcpu->guest_xcr0_loaded = 0;
	return 0;
}

int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
	if (__kvm_set_xcr(vcpu, index, xcr)) {
		kvm_inject_gp(vcpu, 0);
		return 1;
	}
	return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_xcr);

static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *best;

	best = kvm_find_cpuid_entry(vcpu, 1, 0);
	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
}

static void update_cpuid(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *best;

	best = kvm_find_cpuid_entry(vcpu, 1, 0);
	if (!best)
		return;

	/* Update OSXSAVE bit */
	if (cpu_has_xsave && best->function == 0x1) {
		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
			best->ecx |= bit(X86_FEATURE_OSXSAVE);
	}
}

int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
	unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -482,6 +545,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
	if (cr4 & CR4_RESERVED_BITS)
		return 1;

	if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
		return 1;

	if (is_long_mode(vcpu)) {
		if (!(cr4 & X86_CR4_PAE))
			return 1;
@@ -498,6 +564,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
	if ((cr4 ^ old_cr4) & pdptr_bits)
		kvm_mmu_reset_context(vcpu);

	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
		update_cpuid(vcpu);

	return 0;
}

@@ -666,11 +735,6 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
}
EXPORT_SYMBOL_GPL(kvm_get_dr);

static inline u32 bit(int bitno)
{
	return 1 << (bitno & 31);
}

/*
 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -1814,6 +1878,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
	r = 0;
	kvm_apic_set_version(vcpu);
	kvm_x86_ops->cpuid_update(vcpu);
	update_cpuid(vcpu);

out_free:
	vfree(cpuid_entries);
@@ -1837,6 +1902,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
	vcpu->arch.cpuid_nent = cpuid->nent;
	kvm_apic_set_version(vcpu);
	kvm_x86_ops->cpuid_update(vcpu);
	update_cpuid(vcpu);
	return 0;

out:
@@ -1917,7 +1983,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
		0 /* Reserved, DCA */ | F(XMM4_1) |
		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
		0 /* Reserved, XSAVE, OSXSAVE */;
		0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */;
	/* cpuid 0x80000001.ecx */
	const u32 kvm_supported_word6_x86_features =
		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
@@ -1932,7 +1998,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,

	switch (function) {
	case 0:
		entry->eax = min(entry->eax, (u32)0xb);
		entry->eax = min(entry->eax, (u32)0xd);
		break;
	case 1:
		entry->edx &= kvm_supported_word0_x86_features;
@@ -1990,6 +2056,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
		}
		break;
	}
	case 0xd: {
		int i;

		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		for (i = 1; *nent < maxnent; ++i) {
			if (entry[i - 1].eax == 0 && i != 2)
				break;
			do_cpuid_1_ent(&entry[i], function, i);
			entry[i].flags |=
			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
			++*nent;
		}
		break;
	}
	case KVM_CPUID_SIGNATURE: {
		char signature[12] = "KVMKVMKVM\0\0";
		u32 *sigptr = (u32 *)signature;
@@ -4125,6 +4205,9 @@ int kvm_arch_init(void *opaque)

	perf_register_guest_info_callbacks(&kvm_guest_cbs);

	if (cpu_has_xsave)
		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);

	return 0;

out:
@@ -4523,6 +4606,25 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
	}
}

static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
			!vcpu->guest_xcr0_loaded) {
		/* kvm_set_xcr() also depends on this */
		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
		vcpu->guest_xcr0_loaded = 1;
	}
}

static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
{
	if (vcpu->guest_xcr0_loaded) {
		if (vcpu->arch.xcr0 != host_xcr0)
			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
		vcpu->guest_xcr0_loaded = 0;
	}
}

static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
	int r;
@@ -4568,6 +4670,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
	kvm_x86_ops->prepare_guest_switch(vcpu);
	if (vcpu->fpu_active)
		kvm_load_guest_fpu(vcpu);
	kvm_load_guest_xcr0(vcpu);

	atomic_set(&vcpu->guest_mode, 1);
	smp_wmb();
@@ -5124,6 +5227,11 @@ int fx_init(struct kvm_vcpu *vcpu)

	fpu_finit(&vcpu->arch.guest_fpu);

	/*
	 * Ensure guest xcr0 is valid for loading
	 */
	vcpu->arch.xcr0 = XSTATE_FP;

	vcpu->arch.cr0 |= X86_CR0_ET;

	return 0;
@@ -5140,6 +5248,12 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
	if (vcpu->guest_fpu_loaded)
		return;

	/*
	 * Restore all possible states in the guest,
	 * and assume host would use all available bits.
	 * Guest xcr0 would be loaded later.
	 */
	kvm_put_guest_xcr0(vcpu);
	vcpu->guest_fpu_loaded = 1;
	unlazy_fpu(current);
	fpu_restore_checking(&vcpu->arch.guest_fpu);
@@ -5148,6 +5262,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)

void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
	kvm_put_guest_xcr0(vcpu);

	if (!vcpu->guest_fpu_loaded)
		return;

Loading