Commit bd7fe98b authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-svm-6.6' of https://github.com/kvm-x86/linux into HEAD

KVM: x86: SVM changes for 6.6:

 - Add support for SEV-ES DebugSwap, i.e. allow SEV-ES guests to use debug
   registers and generate/handle #DBs

 - Clean up LBR virtualization code

 - Fix a bug where KVM fails to set the target pCPU during an IRTE update

 - Fix fatal bugs in SEV-ES intrahost migration

 - Fix a bug where the recent (architecturally correct) change to reinject
   #BP and skip INT3 broke SEV guests (can't decode INT3 to skip it)
parents 755e732d 80d0f521
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -438,6 +438,7 @@
#define X86_FEATURE_SEV_ES		(19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_V_TSC_AUX		(19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT	(19*32+10) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP		(19*32+14) /* AMD SEV-ES full debug state swap support */

/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP	(20*32+ 0) /* "" No Nested Data Breakpoints */
+3 −2
Original line number Diff line number Diff line
@@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_

#define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)

#define SVM_SEV_FEAT_DEBUG_SWAP                        BIT(5)

struct vmcb_seg {
	u16 selector;
@@ -345,7 +346,7 @@ struct vmcb_save_area {
	u64 last_excp_from;
	u64 last_excp_to;
	u8 reserved_0x298[72];
	u32 spec_ctrl;		/* Guest version of SPEC_CTRL at 0x2E0 */
	u64 spec_ctrl;		/* Guest version of SPEC_CTRL at 0x2E0 */
} __packed;

/* Save area definition for SEV-ES and SEV-SNP guests */
@@ -512,7 +513,7 @@ struct ghcb {
} __packed;


#define EXPECTED_VMCB_SAVE_AREA_SIZE		740
#define EXPECTED_VMCB_SAVE_AREA_SIZE		744
#define EXPECTED_GHCB_SAVE_AREA_SIZE		1032
#define EXPECTED_SEV_ES_SAVE_AREA_SIZE		1648
#define EXPECTED_VMCB_CONTROL_AREA_SIZE		1024
+51 −8
Original line number Diff line number Diff line
@@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
	int ret = 0;
	unsigned long flags;
	struct amd_svm_iommu_ir *ir;
	u64 entry;

	/**
	 * In some cases, the existing irte is updated and re-set,
@@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
	ir->data = pi->ir_data;

	spin_lock_irqsave(&svm->ir_list_lock, flags);

	/*
	 * Update the target pCPU for IOMMU doorbells if the vCPU is running.
	 * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
	 * will update the pCPU info when the vCPU awkened and/or scheduled in.
	 * See also avic_vcpu_load().
	 */
	entry = READ_ONCE(*(svm->avic_physical_id_cache));
	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
		amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
				    true, pi->ir_data);

	list_add(&ir->node, &svm->ir_list);
	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
out:
@@ -986,10 +999,11 @@ static inline int
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
{
	int ret = 0;
	unsigned long flags;
	struct amd_svm_iommu_ir *ir;
	struct vcpu_svm *svm = to_svm(vcpu);

	lockdep_assert_held(&svm->ir_list_lock);

	if (!kvm_arch_has_assigned_device(vcpu->kvm))
		return 0;

@@ -997,26 +1011,23 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
	 * Here, we go through the per-vcpu ir_list to update all existing
	 * interrupt remapping table entry targeting this vcpu.
	 */
	spin_lock_irqsave(&svm->ir_list_lock, flags);

	if (list_empty(&svm->ir_list))
		goto out;
		return 0;

	list_for_each_entry(ir, &svm->ir_list, node) {
		ret = amd_iommu_update_ga(cpu, r, ir->data);
		if (ret)
			break;
	}
out:
	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
			return ret;
	}
	return 0;
}

void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
	u64 entry;
	int h_physical_id = kvm_cpu_get_apicid(cpu);
	struct vcpu_svm *svm = to_svm(vcpu);
	unsigned long flags;

	lockdep_assert_preemption_disabled();

@@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
	if (kvm_vcpu_is_blocking(vcpu))
		return;

	/*
	 * Grab the per-vCPU interrupt remapping lock even if the VM doesn't
	 * _currently_ have assigned devices, as that can change.  Holding
	 * ir_list_lock ensures that either svm_ir_list_add() will consume
	 * up-to-date entry information, or that this task will wait until
	 * svm_ir_list_add() completes to set the new target pCPU.
	 */
	spin_lock_irqsave(&svm->ir_list_lock, flags);

	entry = READ_ONCE(*(svm->avic_physical_id_cache));
	WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);

@@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)

	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);

	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}

void avic_vcpu_put(struct kvm_vcpu *vcpu)
{
	u64 entry;
	struct vcpu_svm *svm = to_svm(vcpu);
	unsigned long flags;

	lockdep_assert_preemption_disabled();

	/*
	 * Note, reading the Physical ID entry outside of ir_list_lock is safe
	 * as only the pCPU that has loaded (or is loading) the vCPU is allowed
	 * to modify the entry, and preemption is disabled.  I.e. the vCPU
	 * can't be scheduled out and thus avic_vcpu_{put,load}() can't run
	 * recursively.
	 */
	entry = READ_ONCE(*(svm->avic_physical_id_cache));

	/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
	if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
		return;

	/*
	 * Take and hold the per-vCPU interrupt remapping lock while updating
	 * the Physical ID entry even though the lock doesn't protect against
	 * multiple writers (see above).  Holding ir_list_lock ensures that
	 * either svm_ir_list_add() will consume up-to-date entry information,
	 * or that this task will wait until svm_ir_list_add() completes to
	 * mark the vCPU as not running.
	 */
	spin_lock_irqsave(&svm->ir_list_lock, flags);

	avic_update_iommu_vcpu_affinity(vcpu, -1, 0);

	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);

	spin_unlock_irqrestore(&svm->ir_list_lock, flags);

}

void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
+83 −17
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include <asm/pkru.h>
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>
#include <asm/debugreg.h>

#include "mmu.h"
#include "x86.h"
@@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
/* enable/disable SEV-ES support */
static bool sev_es_enabled = true;
module_param_named(sev_es, sev_es_enabled, bool, 0444);

/* enable/disable SEV-ES DebugSwap support */
static bool sev_es_debug_swap_enabled = true;
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
#else
#define sev_enabled false
#define sev_es_enabled false
#define sev_es_debug_swap_enabled false
#endif /* CONFIG_KVM_AMD_SEV */

static u8 sev_enc_bit;
@@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
	save->xss  = svm->vcpu.arch.ia32_xss;
	save->dr6  = svm->vcpu.arch.dr6;

	if (sev_es_debug_swap_enabled)
		save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;

	pr_debug("Virtual Machine Save Area (VMSA):\n");
	print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);

@@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
	struct vcpu_svm *svm = to_svm(vcpu);
	int ret;

	if (vcpu->guest_debug) {
		pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
		return -EINVAL;
	}

	/* Perform some pre-encryption checks against the VMSA */
	ret = sev_es_sync_vmsa(svm);
	if (ret)
@@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
		 * Note, the source is not required to have the same number of
		 * vCPUs as the destination when migrating a vanilla SEV VM.
		 */
		src_vcpu = kvm_get_vcpu(dst_kvm, i);
		src_vcpu = kvm_get_vcpu(src_kvm, i);
		src_svm = to_svm(src_vcpu);

		/*
@@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void)
	bool sev_es_supported = false;
	bool sev_supported = false;

	if (!sev_enabled || !npt_enabled)
	if (!sev_enabled || !npt_enabled || !nrips)
		goto out;

	/*
@@ -2256,6 +2270,9 @@ void __init sev_hardware_setup(void)

	sev_enabled = sev_supported;
	sev_es_enabled = sev_es_supported;
	if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
	    !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
		sev_es_debug_swap_enabled = false;
#endif
}

@@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
					    svm->sev_es.ghcb_sa);
		break;
	case SVM_VMGEXIT_NMI_COMPLETE:
		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
		++vcpu->stat.nmi_window_exits;
		svm->nmi_masked = false;
		kvm_make_request(KVM_REQ_EVENT, vcpu);
		ret = 1;
		break;
	case SVM_VMGEXIT_AP_HLT_LOOP:
		ret = kvm_emulate_ap_reset_hold(vcpu);
@@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)

static void sev_es_init_vmcb(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = svm->vmcb01.ptr;
	struct kvm_vcpu *vcpu = &svm->vcpu;

	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
@@ -2952,8 +2973,11 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
	/*
	 * An SEV-ES guest requires a VMSA area that is a separate from the
	 * VMCB page. Do not include the encryption mask on the VMSA physical
	 * address since hardware will access it using the guest key.
	 * address since hardware will access it using the guest key.  Note,
	 * the VMSA will be NULL if this vCPU is the destination for intrahost
	 * migration, and will be copied later.
	 */
	if (svm->sev_es.vmsa)
		svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);

	/* Can't intercept CR register access, HV can't modify CR registers */
@@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
	svm_set_intercept(svm, TRAP_CR4_WRITE);
	svm_set_intercept(svm, TRAP_CR8_WRITE);

	/* No support for enable_vmware_backdoor */
	clr_exception_intercept(svm, GP_VECTOR);
	vmcb->control.intercepts[INTERCEPT_DR] = 0;
	if (!sev_es_debug_swap_enabled) {
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
		recalc_intercepts(svm);
	} else {
		/*
		 * Disable #DB intercept iff DebugSwap is enabled.  KVM doesn't
		 * allow debugging SEV-ES guests, and enables DebugSwap iff
		 * NO_NESTED_DATA_BP is supported, so there's no reason to
		 * intercept #DB when DebugSwap is enabled.  For simplicity
		 * with respect to guest debug, intercept #DB for other VMs
		 * even if NO_NESTED_DATA_BP is supported, i.e. even if the
		 * guest can't DoS the CPU with infinite #DB vectoring.
		 */
		clr_exception_intercept(svm, DB_VECTOR);
	}

	/* Can't intercept XSETBV, HV can't modify XCR0 directly */
	svm_clr_intercept(svm, INTERCEPT_XSETBV);
@@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
	clr_exception_intercept(svm, UD_VECTOR);

	/*
	 * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
	 * KVM can't decrypt guest memory to decode the faulting instruction.
	 */
	clr_exception_intercept(svm, GP_VECTOR);

	if (sev_es_guest(svm->vcpu.kvm))
		sev_es_init_vmcb(svm);
}
@@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
{
	/*
	 * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
	 * of which one step is to perform a VMLOAD.  KVM performs the
	 * corresponding VMSAVE in svm_prepare_guest_switch for both
	 * traditional and SEV-ES guests.
	 * All host state for SEV-ES guests is categorized into three swap types
	 * based on how it is handled by hardware during a world switch:
	 *
	 * A: VMRUN:   Host state saved in host save area
	 *    VMEXIT:  Host state loaded from host save area
	 *
	 * B: VMRUN:   Host state _NOT_ saved in host save area
	 *    VMEXIT:  Host state loaded from host save area
	 *
	 * C: VMRUN:   Host state _NOT_ saved in host save area
	 *    VMEXIT:  Host state initialized to default(reset) values
	 *
	 * Manually save type-B state, i.e. state that is loaded by VMEXIT but
	 * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
	 * by common SVM code).
	 */

	/* XCR0 is restored on VMEXIT, save the current host value */
	hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);

	/* PKRU is restored on VMEXIT, save the current host value */
	hostsa->pkru = read_pkru();

	/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
	hostsa->xss = host_xss;

	/*
	 * If DebugSwap is enabled, debug registers are loaded but NOT saved by
	 * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
	 * saves and loads debug registers (Type-A).
	 */
	if (sev_es_debug_swap_enabled) {
		hostsa->dr0 = native_get_debugreg(0);
		hostsa->dr1 = native_get_debugreg(1);
		hostsa->dr2 = native_get_debugreg(2);
		hostsa->dr3 = native_get_debugreg(3);
		hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
		hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
		hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
		hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
	}
}

void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+112 −67
Original line number Diff line number Diff line
@@ -203,7 +203,7 @@ static int nested = true;
module_param(nested, int, S_IRUGO);

/* enable/disable Next RIP Save */
static int nrips = true;
int nrips = true;
module_param(nrips, int, 0444);

/* enable/disable Virtual VMLOAD VMSAVE */
@@ -365,6 +365,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;

}
static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
					void *insn, int insn_len);

static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
					   bool commit_side_effects)
@@ -385,6 +387,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
	}

	if (!svm->next_rip) {
		/*
		 * FIXME: Drop this when kvm_emulate_instruction() does the
		 * right thing and treats "can't emulate" as outright failure
		 * for EMULTYPE_SKIP.
		 */
		if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
			return 0;

		if (unlikely(!commit_side_effects))
			old_rflags = svm->vmcb->save.rflags;

@@ -677,6 +687,39 @@ static int svm_cpu_init(int cpu)

}

static void set_dr_intercepts(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = svm->vmcb01.ptr;

	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);

	recalc_intercepts(svm);
}

static void clr_dr_intercepts(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = svm->vmcb01.ptr;

	vmcb->control.intercepts[INTERCEPT_DR] = 0;

	recalc_intercepts(svm);
}

static int direct_access_msr_slot(u32 msr)
{
	u32 i;
@@ -947,50 +990,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
		svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
}

static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
{
	/*
	 * If the LBR virtualization is disabled, the LBR msrs are always
	 * kept in the vmcb01 to avoid copying them on nested guest entries.
	 *
	 * If nested, and the LBR virtualization is enabled/disabled, the msrs
	 * are moved between the vmcb01 and vmcb02 as needed.
	 * If LBR virtualization is disabled, the LBR MSRs are always kept in
	 * vmcb01.  If LBR virtualization is enabled and L1 is running VMs of
	 * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
	 */
	struct vmcb *vmcb =
		(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
			svm->vmcb : svm->vmcb01.ptr;

	switch (index) {
	case MSR_IA32_DEBUGCTLMSR:
		return vmcb->save.dbgctl;
	case MSR_IA32_LASTBRANCHFROMIP:
		return vmcb->save.br_from;
	case MSR_IA32_LASTBRANCHTOIP:
		return vmcb->save.br_to;
	case MSR_IA32_LASTINTFROMIP:
		return vmcb->save.last_excp_from;
	case MSR_IA32_LASTINTTOIP:
		return vmcb->save.last_excp_to;
	default:
		KVM_BUG(false, svm->vcpu.kvm,
			"%s: Unknown MSR 0x%x", __func__, index);
		return 0;
	}
	return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
								   svm->vmcb01.ptr;
}

void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
					   DEBUGCTLMSR_LBR;

	bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
				      LBR_CTL_ENABLE_MASK);

	if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
		if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
			enable_lbrv = true;
	bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
	bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
			   (is_guest_mode(vcpu) && svm->lbrv_enabled &&
			    (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));

	if (enable_lbrv == current_enable_lbrv)
		return;
@@ -1201,10 +1218,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
	 * Guest access to VMware backdoor ports could legitimately
	 * trigger #GP because of TSS I/O permission bitmap.
	 * We intercept those #GP and allow access to them anyway
	 * as VMware does.  Don't intercept #GP for SEV guests as KVM can't
	 * decrypt guest memory to decode the faulting instruction.
	 * as VMware does.
	 */
	if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
	if (enable_vmware_backdoor)
		set_exception_intercept(svm, GP_VECTOR);

	svm_set_intercept(svm, INTERCEPT_INTR);
@@ -1949,7 +1965,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if (vcpu->arch.guest_state_protected)
	if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
		return;

	get_debugreg(vcpu->arch.db[0], 0);
@@ -2510,11 +2526,12 @@ static int iret_interception(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	WARN_ON_ONCE(sev_es_guest(vcpu->kvm));

	++vcpu->stat.nmi_window_exits;
	svm->awaiting_iret_completion = true;

	svm_clr_iret_intercept(svm);
	if (!sev_es_guest(vcpu->kvm))
	svm->nmi_iret_rip = kvm_rip_read(vcpu);

	kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -2680,6 +2697,13 @@ static int dr_interception(struct kvm_vcpu *vcpu)
	unsigned long val;
	int err = 0;

	/*
	 * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
	 * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
	 */
	if (sev_es_guest(vcpu->kvm))
		return 1;

	if (vcpu->guest_debug == 0) {
		/*
		 * No more DR vmexits; force a reload of the debug registers
@@ -2802,11 +2826,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
		msr_info->data = svm->tsc_aux;
		break;
	case MSR_IA32_DEBUGCTLMSR:
		msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
		break;
	case MSR_IA32_LASTBRANCHFROMIP:
		msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
		break;
	case MSR_IA32_LASTBRANCHTOIP:
		msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
		break;
	case MSR_IA32_LASTINTFROMIP:
		msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
		break;
	case MSR_IA32_LASTINTTOIP:
		msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
		msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
		break;
	case MSR_VM_HSAVE_PA:
		msr_info->data = svm->nested.hsave_msr;
@@ -3037,13 +3069,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
		if (data & DEBUGCTL_RESERVED_BITS)
			return 1;

		if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
			svm->vmcb->save.dbgctl = data;
		else
			svm->vmcb01.ptr->save.dbgctl = data;

		svm_get_lbr_vmcb(svm)->save.dbgctl = data;
		svm_update_lbrv(vcpu);

		break;
	case MSR_VM_HSAVE_PA:
		/*
@@ -3769,6 +3796,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
	if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
		return; /* IRET will cause a vm exit */

	/*
	 * SEV-ES guests are responsible for signaling when a vCPU is ready to
	 * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
	 * KVM can't intercept and single-step IRET to detect when NMIs are
	 * unblocked (architecturally speaking).  See SVM_VMGEXIT_NMI_COMPLETE.
	 *
	 * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
	 * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
	 * supported NAEs in the GHCB protocol.
	 */
	if (sev_es_guest(vcpu->kvm))
		return;

	if (!gif_set(svm)) {
		if (vgif)
			svm_set_intercept(svm, INTERCEPT_STGI);
@@ -3918,12 +3958,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
	svm->soft_int_injected = false;

	/*
	 * If we've made progress since setting HF_IRET_MASK, we've
	 * If we've made progress since setting awaiting_iret_completion, we've
	 * executed an IRET and can allow NMI injection.
	 */
	if (svm->awaiting_iret_completion &&
	    (sev_es_guest(vcpu->kvm) ||
	     kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
	    kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
		svm->awaiting_iret_completion = false;
		svm->nmi_masked = false;
		kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -4651,15 +4690,24 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
	 * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
	 * decode garbage.
	 *
	 * Inject #UD if KVM reached this point without an instruction buffer.
	 * In practice, this path should never be hit by a well-behaved guest,
	 * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
	 * is still theoretically reachable, e.g. via unaccelerated fault-like
	 * AVIC access, and needs to be handled by KVM to avoid putting the
	 * guest into an infinite loop.   Injecting #UD is somewhat arbitrary,
	 * but its the least awful option given lack of insight into the guest.
	 * If KVM is NOT trying to simply skip an instruction, inject #UD if
	 * KVM reached this point without an instruction buffer.  In practice,
	 * this path should never be hit by a well-behaved guest, e.g. KVM
	 * doesn't intercept #UD or #GP for SEV guests, but this path is still
	 * theoretically reachable, e.g. via unaccelerated fault-like AVIC
	 * access, and needs to be handled by KVM to avoid putting the guest
	 * into an infinite loop.   Injecting #UD is somewhat arbitrary, but
	 * its the least awful option given lack of insight into the guest.
	 *
	 * If KVM is trying to skip an instruction, simply resume the guest.
	 * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
	 * will attempt to re-inject the INT3/INTO and skip the instruction.
	 * In that scenario, retrying the INT3/INTO and hoping the guest will
	 * make forward progress is the only option that has a chance of
	 * success (and in practice it will work the vast majority of the time).
	 */
	if (unlikely(!insn)) {
		if (!(emul_type & EMULTYPE_SKIP))
			kvm_queue_exception(vcpu, UD_VECTOR);
		return false;
	}
@@ -5112,9 +5160,11 @@ static __init int svm_hardware_setup(void)

	svm_adjust_mmio_mask();

	nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);

	/*
	 * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
	 * may be modified by svm_adjust_mmio_mask()).
	 * may be modified by svm_adjust_mmio_mask()), as well as nrips.
	 */
	sev_hardware_setup();

@@ -5126,11 +5176,6 @@ static __init int svm_hardware_setup(void)
			goto err;
	}

	if (nrips) {
		if (!boot_cpu_has(X86_FEATURE_NRIPS))
			nrips = false;
	}

	enable_apicv = avic = avic && avic_hardware_setup();

	if (!enable_apicv) {
Loading