Merge tag 'kvm-x86-svm-6.4' of https://github.com/kvm-x86/linux into HEAD (4a5fd419) · Commits · EulixOS / Software / Kernel

arch/x86/include/asm/cpufeatures.h

+4 −4

Original line number	Diff line number	Diff line
		@@ -226,10 +226,9 @@

		/* Virtualization flags: Linux defined, word 8 */
		#define X86_FEATURE_TPR_SHADOW ( 832+ 0) / Intel TPR Shadow */
		#define X86_FEATURE_VNMI ( 832+ 1) / Intel Virtual NMI */
		#define X86_FEATURE_FLEXPRIORITY ( 832+ 2) / Intel FlexPriority */
		#define X86_FEATURE_EPT ( 832+ 3) / Intel Extended Page Table */
		#define X86_FEATURE_VPID ( 832+ 4) / Intel Virtual Processor ID */
		#define X86_FEATURE_FLEXPRIORITY ( 832+ 1) / Intel FlexPriority */
		#define X86_FEATURE_EPT ( 832+ 2) / Intel Extended Page Table */
		#define X86_FEATURE_VPID ( 832+ 3) / Intel Virtual Processor ID */

		#define X86_FEATURE_VMMCALL ( 832+15) / Prefer VMMCALL to VMCALL */
		#define X86_FEATURE_XENPV ( 832+16) / "" Xen paravirtual guest */
		@@ -370,6 +369,7 @@
		#define X86_FEATURE_VGIF (1532+16) / Virtual GIF */
		#define X86_FEATURE_X2AVIC (1532+18) / Virtual x2apic */
		#define X86_FEATURE_V_SPEC_CTRL (1532+20) / Virtual SPEC_CTRL */
		#define X86_FEATURE_VNMI (1532+25) / Virtual NMI */
		#define X86_FEATURE_SVME_ADDR_CHK (1532+28) / "" SVME addr check */

		/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */

arch/x86/include/asm/kvm-x86-ops.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow)
		KVM_X86_OP(patch_hypercall)
		KVM_X86_OP(inject_irq)
		KVM_X86_OP(inject_nmi)
		KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
		KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
		KVM_X86_OP(inject_exception)
		KVM_X86_OP(cancel_injection)
		KVM_X86_OP(interrupt_allowed)

arch/x86/include/asm/kvm_host.h

+10 −1

Original line number	Diff line number	Diff line
		@@ -874,7 +874,8 @@ struct kvm_vcpu_arch {
		u64 tsc_scaling_ratio; /* current scaling ratio */

		atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
		unsigned nmi_pending; /* NMI queued after currently running handler */
		/* Number of NMIs pending injection, not including hardware vNMIs. */
		unsigned int nmi_pending;
		bool nmi_injected; /* Trying to inject an NMI this entry */
		bool smi_pending; /* SMI queued after currently running handler */
		u8 handling_intr_from_guest;
		@@ -1619,6 +1620,13 @@ struct kvm_x86_ops {
		int (nmi_allowed)(struct kvm_vcpu vcpu, bool for_injection);
		bool (get_nmi_mask)(struct kvm_vcpu vcpu);
		void (set_nmi_mask)(struct kvm_vcpu vcpu, bool masked);
		/* Whether or not a virtual NMI is pending in hardware. */
		bool (is_vnmi_pending)(struct kvm_vcpu vcpu);
		/*
		* Attempt to pend a virtual NMI in harware. Returns %true on success
		* to allow using static_call_ret0 as the fallback.
		*/
		bool (set_vnmi_pending)(struct kvm_vcpu vcpu);
		void (enable_nmi_window)(struct kvm_vcpu vcpu);
		void (enable_irq_window)(struct kvm_vcpu vcpu);
		void (update_cr8_intercept)(struct kvm_vcpu vcpu, int tpr, int irr);
		@@ -1999,6 +2007,7 @@ int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
		void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);

		void kvm_inject_nmi(struct kvm_vcpu *vcpu);
		int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);

		void kvm_update_dr7(struct kvm_vcpu *vcpu);

arch/x86/include/asm/svm.h

+9 −1

Original line number	Diff line number	Diff line
		@@ -183,6 +183,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
		#define V_GIF_SHIFT 9
		#define V_GIF_MASK (1 << V_GIF_SHIFT)

		#define V_NMI_PENDING_SHIFT 11
		#define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT)

		#define V_NMI_BLOCKING_SHIFT 12
		#define V_NMI_BLOCKING_MASK (1 << V_NMI_BLOCKING_SHIFT)

		#define V_INTR_PRIO_SHIFT 16
		#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)

		@@ -197,6 +203,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
		#define V_GIF_ENABLE_SHIFT 25
		#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)

		#define V_NMI_ENABLE_SHIFT 26
		#define V_NMI_ENABLE_MASK (1 << V_NMI_ENABLE_SHIFT)

		#define AVIC_ENABLE_SHIFT 31
		#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)

		@@ -278,7 +287,6 @@ static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_
		static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);

		#define AVIC_HPA_MASK ~((0xFFFULL << 52) \| 0xFFF)
		#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL


		struct vmcb_seg {

arch/x86/kvm/svm/nested.c

+75 −16

Original line number	Diff line number	Diff line
		@@ -139,12 +139,17 @@ void recalc_intercepts(struct vcpu_svm *svm)

		if (g->int_ctl & V_INTR_MASKING_MASK) {
		/*
		* Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
		* does not affect any interrupt we may want to inject;
		* therefore, writes to CR8 are irrelevant to L0, as are
		* interrupt window vmexits.
		* If L2 is active and V_INTR_MASKING is enabled in vmcb12,
		* disable intercept of CR8 writes as L2's CR8 does not affect
		* any interrupt KVM may want to inject.
		*
		* Similarly, disable intercept of virtual interrupts (used to
		* detect interrupt windows) if the saved RFLAGS.IF is '0', as
		* the effective RFLAGS.IF for L1 interrupts will never be set
		* while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
		*/
		vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
		if (!(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF))
		vmcb_clr_intercept(c, INTERCEPT_VINTR);
		}

		@@ -276,6 +281,11 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
		if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
		return false;

		if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
		!vmcb12_is_intercept(control, INTERCEPT_NMI))) {
		return false;
		}

		return true;
		}

		@@ -416,22 +426,24 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)

		/* Only a few fields of int_ctl are written by the processor. */
		mask = V_IRQ_MASK \| V_TPR_MASK;
		if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
		svm_is_intercept(svm, INTERCEPT_VINTR)) {
		/*
		* In order to request an interrupt window, L0 is usurping
		* svm->vmcb->control.int_ctl and possibly setting V_IRQ
		* even if it was clear in L1's VMCB. Restoring it would be
		* wrong. However, in this case V_IRQ will remain true until
		* interrupt_window_interception calls svm_clear_vintr and
		* restores int_ctl. We can just leave it aside.
		* Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting
		* virtual interrupts in order to request an interrupt window, as KVM
		* has usurped vmcb02's int_ctl. If an interrupt window opens before
		* the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl.
		* If no window opens, V_IRQ will be correctly preserved in vmcb12's
		* int_ctl (because it was never recognized while L2 was running).
		*/
		if (svm_is_intercept(svm, INTERCEPT_VINTR) &&
		!test_bit(INTERCEPT_VINTR, (unsigned long *)svm->nested.ctl.intercepts))
		mask &= ~V_IRQ_MASK;
		}

		if (nested_vgif_enabled(svm))
		mask \|= V_GIF_MASK;

		if (nested_vnmi_enabled(svm))
		mask \|= V_NMI_BLOCKING_MASK \| V_NMI_PENDING_MASK;

		svm->nested.ctl.int_ctl &= ~mask;
		svm->nested.ctl.int_ctl \|= svm->vmcb->control.int_ctl & mask;
		}
		@@ -651,6 +663,17 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
		else
		int_ctl_vmcb01_bits \|= (V_GIF_MASK \| V_GIF_ENABLE_MASK);

		if (vnmi) {
		if (vmcb01->control.int_ctl & V_NMI_PENDING_MASK) {
		svm->vcpu.arch.nmi_pending++;
		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
		}
		if (nested_vnmi_enabled(svm))
		int_ctl_vmcb12_bits \|= (V_NMI_PENDING_MASK \|
		V_NMI_ENABLE_MASK \|
		V_NMI_BLOCKING_MASK);
		}

		/* Copied from vmcb01. msrpm_base can be overwritten later. */
		vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
		vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
		@@ -1021,6 +1044,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm)

		svm_switch_vmcb(svm, &svm->vmcb01);

		/*
		* Rules for synchronizing int_ctl bits from vmcb02 to vmcb01:
		*
		* V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR: If L1 doesn't
		* intercept interrupts, then KVM will use vmcb02's V_IRQ (and related
		* flags) to detect interrupt windows for L1 IRQs (even if L1 uses
		* virtual interrupt masking). Raise KVM_REQ_EVENT to ensure that
		* KVM re-requests an interrupt window if necessary, which implicitly
		* copies this bits from vmcb02 to vmcb01.
		*
		* V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR
		* is stored in vmcb02, but its value doesn't need to be copied from/to
		* vmcb01 because it is copied from/to the virtual APIC's TPR register
		* on each VM entry/exit.
		*
		* V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's
		* V_GIF. However, GIF is architecturally clear on each VM exit, thus
		* there is no need to copy V_GIF from vmcb02 to vmcb01.
		*/
		if (!nested_exit_on_intr(svm))
		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);

		if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
		svm_copy_lbrs(vmcb12, vmcb02);
		svm_update_lbrv(vcpu);
		@@ -1029,6 +1074,20 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
		svm_update_lbrv(vcpu);
		}

		if (vnmi) {
		if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)
		vmcb01->control.int_ctl \|= V_NMI_BLOCKING_MASK;
		else
		vmcb01->control.int_ctl &= ~V_NMI_BLOCKING_MASK;

		if (vcpu->arch.nmi_pending) {
		vcpu->arch.nmi_pending--;
		vmcb01->control.int_ctl \|= V_NMI_PENDING_MASK;
		} else {
		vmcb01->control.int_ctl &= ~V_NMI_PENDING_MASK;
		}
		}

		/*
		* On vmexit the GIF is set to false and
		* no event can be injected in L1.

Original line number	Diff line number	Diff line
		@@ -226,10 +226,9 @@

		/* Virtualization flags: Linux defined, word 8 */
		#define X86_FEATURE_TPR_SHADOW ( 832+ 0) / Intel TPR Shadow */
		#define X86_FEATURE_VNMI ( 832+ 1) / Intel Virtual NMI */
		#define X86_FEATURE_FLEXPRIORITY ( 832+ 2) / Intel FlexPriority */
		#define X86_FEATURE_EPT ( 832+ 3) / Intel Extended Page Table */
		#define X86_FEATURE_VPID ( 832+ 4) / Intel Virtual Processor ID */
		#define X86_FEATURE_FLEXPRIORITY ( 832+ 1) / Intel FlexPriority */
		#define X86_FEATURE_EPT ( 832+ 2) / Intel Extended Page Table */
		#define X86_FEATURE_VPID ( 832+ 3) / Intel Virtual Processor ID */

		#define X86_FEATURE_VMMCALL ( 832+15) / Prefer VMMCALL to VMCALL */
		#define X86_FEATURE_XENPV ( 832+16) / "" Xen paravirtual guest */
		@@ -370,6 +369,7 @@
		#define X86_FEATURE_VGIF (1532+16) / Virtual GIF */
		#define X86_FEATURE_X2AVIC (1532+18) / Virtual x2apic */
		#define X86_FEATURE_V_SPEC_CTRL (1532+20) / Virtual SPEC_CTRL */
		#define X86_FEATURE_VNMI (1532+25) / Virtual NMI */
		#define X86_FEATURE_SVME_ADDR_CHK (1532+28) / "" SVME addr check */

		/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */