Commit 4a5fd419 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-svm-6.4' of https://github.com/kvm-x86/linux into HEAD

KVM SVM changes for 6.4:

 - Add support for virtual NMIs

 - Fixes for edge cases related to virtual interrupts
parents c21775ae c0d0ce9b
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -226,10 +226,9 @@

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI		( 8*32+ 1) /* Intel Virtual NMI */
#define X86_FEATURE_FLEXPRIORITY	( 8*32+ 2) /* Intel FlexPriority */
#define X86_FEATURE_EPT			( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID		( 8*32+ 4) /* Intel Virtual Processor ID */
#define X86_FEATURE_FLEXPRIORITY	( 8*32+ 1) /* Intel FlexPriority */
#define X86_FEATURE_EPT			( 8*32+ 2) /* Intel Extended Page Table */
#define X86_FEATURE_VPID		( 8*32+ 3) /* Intel Virtual Processor ID */

#define X86_FEATURE_VMMCALL		( 8*32+15) /* Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV		( 8*32+16) /* "" Xen paravirtual guest */
@@ -370,6 +369,7 @@
#define X86_FEATURE_VGIF		(15*32+16) /* Virtual GIF */
#define X86_FEATURE_X2AVIC		(15*32+18) /* Virtual x2apic */
#define X86_FEATURE_V_SPEC_CTRL		(15*32+20) /* Virtual SPEC_CTRL */
#define X86_FEATURE_VNMI		(15*32+25) /* Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK	(15*32+28) /* "" SVME addr check */

/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+2 −0
Original line number Diff line number Diff line
@@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow)
KVM_X86_OP(patch_hypercall)
KVM_X86_OP(inject_irq)
KVM_X86_OP(inject_nmi)
KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed)
+10 −1
Original line number Diff line number Diff line
@@ -874,7 +874,8 @@ struct kvm_vcpu_arch {
	u64 tsc_scaling_ratio; /* current scaling ratio */

	atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
	unsigned nmi_pending; /* NMI queued after currently running handler */
	/* Number of NMIs pending injection, not including hardware vNMIs. */
	unsigned int nmi_pending;
	bool nmi_injected;    /* Trying to inject an NMI this entry */
	bool smi_pending;    /* SMI queued after currently running handler */
	u8 handling_intr_from_guest;
@@ -1619,6 +1620,13 @@ struct kvm_x86_ops {
	int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
	/* Whether or not a virtual NMI is pending in hardware. */
	bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
	/*
	 * Attempt to pend a virtual NMI in harware.  Returns %true on success
	 * to allow using static_call_ret0 as the fallback.
	 */
	bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
@@ -1999,6 +2007,7 @@ int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);

void kvm_inject_nmi(struct kvm_vcpu *vcpu);
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);

void kvm_update_dr7(struct kvm_vcpu *vcpu);

+9 −1
Original line number Diff line number Diff line
@@ -183,6 +183,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_GIF_SHIFT 9
#define V_GIF_MASK (1 << V_GIF_SHIFT)

#define V_NMI_PENDING_SHIFT 11
#define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT)

#define V_NMI_BLOCKING_SHIFT 12
#define V_NMI_BLOCKING_MASK (1 << V_NMI_BLOCKING_SHIFT)

#define V_INTR_PRIO_SHIFT 16
#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)

@@ -197,6 +203,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_GIF_ENABLE_SHIFT 25
#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)

#define V_NMI_ENABLE_SHIFT 26
#define V_NMI_ENABLE_MASK (1 << V_NMI_ENABLE_SHIFT)

#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)

@@ -278,7 +287,6 @@ static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);

#define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
#define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL


struct vmcb_seg {
+75 −16
Original line number Diff line number Diff line
@@ -139,12 +139,17 @@ void recalc_intercepts(struct vcpu_svm *svm)

	if (g->int_ctl & V_INTR_MASKING_MASK) {
		/*
		 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
		 * does not affect any interrupt we may want to inject;
		 * therefore, writes to CR8 are irrelevant to L0, as are
		 * interrupt window vmexits.
		 * If L2 is active and V_INTR_MASKING is enabled in vmcb12,
		 * disable intercept of CR8 writes as L2's CR8 does not affect
		 * any interrupt KVM may want to inject.
		 *
		 * Similarly, disable intercept of virtual interrupts (used to
		 * detect interrupt windows) if the saved RFLAGS.IF is '0', as
		 * the effective RFLAGS.IF for L1 interrupts will never be set
		 * while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
		 */
		vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
		if (!(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF))
			vmcb_clr_intercept(c, INTERCEPT_VINTR);
	}

@@ -276,6 +281,11 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
	if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
		return false;

	if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
	       !vmcb12_is_intercept(control, INTERCEPT_NMI))) {
		return false;
	}

	return true;
}

@@ -416,22 +426,24 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)

	/* Only a few fields of int_ctl are written by the processor.  */
	mask = V_IRQ_MASK | V_TPR_MASK;
	if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
	    svm_is_intercept(svm, INTERCEPT_VINTR)) {
	/*
		 * In order to request an interrupt window, L0 is usurping
		 * svm->vmcb->control.int_ctl and possibly setting V_IRQ
		 * even if it was clear in L1's VMCB.  Restoring it would be
		 * wrong.  However, in this case V_IRQ will remain true until
		 * interrupt_window_interception calls svm_clear_vintr and
		 * restores int_ctl.  We can just leave it aside.
	 * Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting
	 * virtual interrupts in order to request an interrupt window, as KVM
	 * has usurped vmcb02's int_ctl.  If an interrupt window opens before
	 * the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl.
	 * If no window opens, V_IRQ will be correctly preserved in vmcb12's
	 * int_ctl (because it was never recognized while L2 was running).
	 */
	if (svm_is_intercept(svm, INTERCEPT_VINTR) &&
	    !test_bit(INTERCEPT_VINTR, (unsigned long *)svm->nested.ctl.intercepts))
		mask &= ~V_IRQ_MASK;
	}

	if (nested_vgif_enabled(svm))
		mask |= V_GIF_MASK;

	if (nested_vnmi_enabled(svm))
		mask |= V_NMI_BLOCKING_MASK | V_NMI_PENDING_MASK;

	svm->nested.ctl.int_ctl        &= ~mask;
	svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
}
@@ -651,6 +663,17 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
	else
		int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);

	if (vnmi) {
		if (vmcb01->control.int_ctl & V_NMI_PENDING_MASK) {
			svm->vcpu.arch.nmi_pending++;
			kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
		}
		if (nested_vnmi_enabled(svm))
			int_ctl_vmcb12_bits |= (V_NMI_PENDING_MASK |
						V_NMI_ENABLE_MASK |
						V_NMI_BLOCKING_MASK);
	}

	/* Copied from vmcb01.  msrpm_base can be overwritten later.  */
	vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
	vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
@@ -1021,6 +1044,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm)

	svm_switch_vmcb(svm, &svm->vmcb01);

	/*
	 * Rules for synchronizing int_ctl bits from vmcb02 to vmcb01:
	 *
	 * V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR:  If L1 doesn't
	 * intercept interrupts, then KVM will use vmcb02's V_IRQ (and related
	 * flags) to detect interrupt windows for L1 IRQs (even if L1 uses
	 * virtual interrupt masking).  Raise KVM_REQ_EVENT to ensure that
	 * KVM re-requests an interrupt window if necessary, which implicitly
	 * copies this bits from vmcb02 to vmcb01.
	 *
	 * V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR
	 * is stored in vmcb02, but its value doesn't need to be copied from/to
	 * vmcb01 because it is copied from/to the virtual APIC's TPR register
	 * on each VM entry/exit.
	 *
	 * V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's
	 * V_GIF.  However, GIF is architecturally clear on each VM exit, thus
	 * there is no need to copy V_GIF from vmcb02 to vmcb01.
	 */
	if (!nested_exit_on_intr(svm))
		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);

	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
		svm_copy_lbrs(vmcb12, vmcb02);
		svm_update_lbrv(vcpu);
@@ -1029,6 +1074,20 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
		svm_update_lbrv(vcpu);
	}

	if (vnmi) {
		if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)
			vmcb01->control.int_ctl |= V_NMI_BLOCKING_MASK;
		else
			vmcb01->control.int_ctl &= ~V_NMI_BLOCKING_MASK;

		if (vcpu->arch.nmi_pending) {
			vcpu->arch.nmi_pending--;
			vmcb01->control.int_ctl |= V_NMI_PENDING_MASK;
		} else {
			vmcb01->control.int_ctl &= ~V_NMI_PENDING_MASK;
		}
	}

	/*
	 * On vmexit the  GIF is set to false and
	 * no event can be injected in L1.
Loading