Commit bb1fcc70 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini
Browse files

KVM: nVMX: Allow L1 to use 5-level page walks for nested EPT



Add support for 5-level nested EPT, and advertise said support in the
EPT capabilities MSR.  KVM's MMU can already handle 5-level legacy page
tables, there's no reason to force an L1 VMM to use shadow paging if it
wants to employ 5-level page tables.

Signed-off-by: default avatarSean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 8053f924
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -500,6 +500,18 @@ enum vmcs_field {
						 VMX_EPT_EXECUTABLE_MASK)
#define VMX_EPT_MT_MASK				(7ull << VMX_EPT_MT_EPTE_SHIFT)

static inline u8 vmx_eptp_page_walk_level(u64 eptp)
{
	u64 encoded_level = eptp & VMX_EPTP_PWL_MASK;

	if (encoded_level == VMX_EPTP_PWL_5)
		return 5;

	/* @eptp must be pre-validated by the caller. */
	WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4);
	return 4;
}

/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
#define VMX_EPT_MISCONFIG_WX_VALUE		(VMX_EPT_WRITABLE_MASK |       \
						 VMX_EPT_EXECUTABLE_MASK)
+6 −5
Original line number Diff line number Diff line
@@ -5008,14 +5008,14 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);

static union kvm_mmu_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
				   bool execonly)
				   bool execonly, u8 level)
{
	union kvm_mmu_role role = {0};

	/* SMM flag is inherited from root_mmu */
	role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;

	role.base.level = PT64_ROOT_4LEVEL;
	role.base.level = level;
	role.base.gpte_is_8_bytes = true;
	role.base.direct = false;
	role.base.ad_disabled = !accessed_dirty;
@@ -5039,16 +5039,17 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
			     bool accessed_dirty, gpa_t new_eptp)
{
	struct kvm_mmu *context = vcpu->arch.mmu;
	u8 level = vmx_eptp_page_walk_level(new_eptp);
	union kvm_mmu_role new_role =
		kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
						   execonly);
						   execonly, level);

	__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);

	if (new_role.as_u64 == context->mmu_role.as_u64)
		return;

	context->shadow_root_level = PT64_ROOT_4LEVEL;
	context->shadow_root_level = level;

	context->nx = true;
	context->ept_ad = accessed_dirty;
@@ -5057,7 +5058,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
	context->sync_page = ept_sync_page;
	context->invlpg = ept_invlpg;
	context->update_pte = ept_update_pte;
	context->root_level = PT64_ROOT_4LEVEL;
	context->root_level = level;
	context->direct_map = false;
	context->mmu_role.as_u64 = new_role.as_u64;

+1 −1
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@
	#define PT_GUEST_ACCESSED_SHIFT 8
	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
	#define CMPXCHG cmpxchg64
	#define PT_MAX_FULL_LEVELS 4
	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
#else
	#error Invalid PTTYPE value
#endif
+17 −4
Original line number Diff line number Diff line
@@ -2582,9 +2582,19 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
		return false;
	}

	/* only 4 levels page-walk length are valid */
	if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4))
	/* Page-walk levels validity. */
	switch (address & VMX_EPTP_PWL_MASK) {
	case VMX_EPTP_PWL_5:
		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
			return false;
		break;
	case VMX_EPTP_PWL_4:
		if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
			return false;
		break;
	default:
		return false;
	}

	/* Reserved bits should not be set */
	if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
@@ -6119,8 +6129,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
		/* nested EPT: emulate EPT also to L1 */
		msrs->secondary_ctls_high |=
			SECONDARY_EXEC_ENABLE_EPT;
		msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
			 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
		msrs->ept_caps =
			VMX_EPT_PAGE_WALK_4_BIT |
			VMX_EPT_PAGE_WALK_5_BIT |
			VMX_EPTP_WB_BIT |
			VMX_EPT_INVEPT_BIT;
		if (cpu_has_vmx_ept_execute_only())
			msrs->ept_caps |=
				VMX_EPT_EXECUTE_ONLY_BIT;
+1 −2
Original line number Diff line number Diff line
@@ -2985,9 +2985,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)

static int get_ept_level(struct kvm_vcpu *vcpu)
{
	/* Nested EPT currently only supports 4-level walks. */
	if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
		return 4;
		return vmx_eptp_page_walk_level(nested_ept_get_cr3(vcpu));
	if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
		return 5;
	return 4;