Merge tag 'kvm-x86-svm-6.6' of https://github.com/kvm-x86/linux into HEAD (bd7fe98b) · Commits · EulixOS / Software / Kernel

arch/x86/include/asm/cpufeatures.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -438,6 +438,7 @@
		#define X86_FEATURE_SEV_ES (1932+ 3) / AMD Secure Encrypted Virtualization - Encrypted State */
		#define X86_FEATURE_V_TSC_AUX (1932+ 9) / "" Virtual TSC_AUX */
		#define X86_FEATURE_SME_COHERENT (1932+10) / "" AMD hardware-enforced cache coherency */
		#define X86_FEATURE_DEBUG_SWAP (1932+14) / AMD SEV-ES full debug state swap support */

		/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
		#define X86_FEATURE_NO_NESTED_DATA_BP (2032+ 0) / "" No Nested Data Breakpoints */

arch/x86/include/asm/svm.h

+3 −2

Original line number	Diff line number	Diff line
		@@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_

		#define AVIC_HPA_MASK ~((0xFFFULL << 52) \| 0xFFF)

		#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)

		struct vmcb_seg {
		u16 selector;
		@@ -345,7 +346,7 @@ struct vmcb_save_area {
		u64 last_excp_from;
		u64 last_excp_to;
		u8 reserved_0x298[72];
		u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
		u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
		} __packed;

		/* Save area definition for SEV-ES and SEV-SNP guests */
		@@ -512,7 +513,7 @@ struct ghcb {
		} __packed;


		#define EXPECTED_VMCB_SAVE_AREA_SIZE 740
		#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
		#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
		#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
		#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024

arch/x86/kvm/svm/avic.c

+51 −8

Original line number	Diff line number	Diff line
		@@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm svm, struct amd_iommu_pi_data pi)
		int ret = 0;
		unsigned long flags;
		struct amd_svm_iommu_ir *ir;
		u64 entry;

		/**
		* In some cases, the existing irte is updated and re-set,
		@@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm svm, struct amd_iommu_pi_data pi)
		ir->data = pi->ir_data;

		spin_lock_irqsave(&svm->ir_list_lock, flags);

		/*
		* Update the target pCPU for IOMMU doorbells if the vCPU is running.
		* If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
		* will update the pCPU info when the vCPU awkened and/or scheduled in.
		* See also avic_vcpu_load().
		*/
		entry = READ_ONCE(*(svm->avic_physical_id_cache));
		if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
		amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
		true, pi->ir_data);

		list_add(&ir->node, &svm->ir_list);
		spin_unlock_irqrestore(&svm->ir_list_lock, flags);
		out:
		@@ -986,10 +999,11 @@ static inline int
		avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
		{
		int ret = 0;
		unsigned long flags;
		struct amd_svm_iommu_ir *ir;
		struct vcpu_svm *svm = to_svm(vcpu);

		lockdep_assert_held(&svm->ir_list_lock);

		if (!kvm_arch_has_assigned_device(vcpu->kvm))
		return 0;

		@@ -997,26 +1011,23 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
		* Here, we go through the per-vcpu ir_list to update all existing
		* interrupt remapping table entry targeting this vcpu.
		*/
		spin_lock_irqsave(&svm->ir_list_lock, flags);

		if (list_empty(&svm->ir_list))
		goto out;
		return 0;

		list_for_each_entry(ir, &svm->ir_list, node) {
		ret = amd_iommu_update_ga(cpu, r, ir->data);
		if (ret)
		break;
		}
		out:
		spin_unlock_irqrestore(&svm->ir_list_lock, flags);
		return ret;
		}
		return 0;
		}

		void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
		{
		u64 entry;
		int h_physical_id = kvm_cpu_get_apicid(cpu);
		struct vcpu_svm *svm = to_svm(vcpu);
		unsigned long flags;

		lockdep_assert_preemption_disabled();

		@@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
		if (kvm_vcpu_is_blocking(vcpu))
		return;

		/*
		* Grab the per-vCPU interrupt remapping lock even if the VM doesn't
		* _currently_ have assigned devices, as that can change. Holding
		* ir_list_lock ensures that either svm_ir_list_add() will consume
		* up-to-date entry information, or that this task will wait until
		* svm_ir_list_add() completes to set the new target pCPU.
		*/
		spin_lock_irqsave(&svm->ir_list_lock, flags);

		entry = READ_ONCE(*(svm->avic_physical_id_cache));
		WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);

		@@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)

		WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
		avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);

		spin_unlock_irqrestore(&svm->ir_list_lock, flags);
		}

		void avic_vcpu_put(struct kvm_vcpu *vcpu)
		{
		u64 entry;
		struct vcpu_svm *svm = to_svm(vcpu);
		unsigned long flags;

		lockdep_assert_preemption_disabled();

		/*
		* Note, reading the Physical ID entry outside of ir_list_lock is safe
		* as only the pCPU that has loaded (or is loading) the vCPU is allowed
		* to modify the entry, and preemption is disabled. I.e. the vCPU
		* can't be scheduled out and thus avic_vcpu_{put,load}() can't run
		* recursively.
		*/
		entry = READ_ONCE(*(svm->avic_physical_id_cache));

		/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
		if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
		return;

		/*
		* Take and hold the per-vCPU interrupt remapping lock while updating
		* the Physical ID entry even though the lock doesn't protect against
		* multiple writers (see above). Holding ir_list_lock ensures that
		* either svm_ir_list_add() will consume up-to-date entry information,
		* or that this task will wait until svm_ir_list_add() completes to
		* mark the vCPU as not running.
		*/
		spin_lock_irqsave(&svm->ir_list_lock, flags);

		avic_update_iommu_vcpu_affinity(vcpu, -1, 0);

		entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
		WRITE_ONCE(*(svm->avic_physical_id_cache), entry);

		spin_unlock_irqrestore(&svm->ir_list_lock, flags);

		}

		void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)

arch/x86/kvm/svm/sev.c

+83 −17

Original line number	Diff line number	Diff line
		@@ -23,6 +23,7 @@
		#include <asm/pkru.h>
		#include <asm/trapnr.h>
		#include <asm/fpu/xcr.h>
		#include <asm/debugreg.h>

		#include "mmu.h"
		#include "x86.h"
		@@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
		/* enable/disable SEV-ES support */
		static bool sev_es_enabled = true;
		module_param_named(sev_es, sev_es_enabled, bool, 0444);

		/* enable/disable SEV-ES DebugSwap support */
		static bool sev_es_debug_swap_enabled = true;
		module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
		#else
		#define sev_enabled false
		#define sev_es_enabled false
		#define sev_es_debug_swap_enabled false
		#endif /* CONFIG_KVM_AMD_SEV */

		static u8 sev_enc_bit;
		@@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
		save->xss = svm->vcpu.arch.ia32_xss;
		save->dr6 = svm->vcpu.arch.dr6;

		if (sev_es_debug_swap_enabled)
		save->sev_features \|= SVM_SEV_FEAT_DEBUG_SWAP;

		pr_debug("Virtual Machine Save Area (VMSA):\n");
		print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);

		@@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm kvm, struct kvm_vcpu vcpu,
		struct vcpu_svm *svm = to_svm(vcpu);
		int ret;

		if (vcpu->guest_debug) {
		pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
		return -EINVAL;
		}

		/* Perform some pre-encryption checks against the VMSA */
		ret = sev_es_sync_vmsa(svm);
		if (ret)
		@@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm dst_kvm, struct kvm src_kvm)
		* Note, the source is not required to have the same number of
		* vCPUs as the destination when migrating a vanilla SEV VM.
		*/
		src_vcpu = kvm_get_vcpu(dst_kvm, i);
		src_vcpu = kvm_get_vcpu(src_kvm, i);
		src_svm = to_svm(src_vcpu);

		/*
		@@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void)
		bool sev_es_supported = false;
		bool sev_supported = false;

		if (!sev_enabled \|\| !npt_enabled)
		if (!sev_enabled \|\| !npt_enabled \|\| !nrips)
		goto out;

		/*
		@@ -2256,6 +2270,9 @@ void __init sev_hardware_setup(void)

		sev_enabled = sev_supported;
		sev_es_enabled = sev_es_supported;
		if (!sev_es_enabled \|\| !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) \|\|
		!cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
		sev_es_debug_swap_enabled = false;
		#endif
		}

		@@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
		svm->sev_es.ghcb_sa);
		break;
		case SVM_VMGEXIT_NMI_COMPLETE:
		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
		++vcpu->stat.nmi_window_exits;
		svm->nmi_masked = false;
		kvm_make_request(KVM_REQ_EVENT, vcpu);
		ret = 1;
		break;
		case SVM_VMGEXIT_AP_HLT_LOOP:
		ret = kvm_emulate_ap_reset_hold(vcpu);
		@@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)

		static void sev_es_init_vmcb(struct vcpu_svm *svm)
		{
		struct vmcb *vmcb = svm->vmcb01.ptr;
		struct kvm_vcpu *vcpu = &svm->vcpu;

		svm->vmcb->control.nested_ctl \|= SVM_NESTED_CTL_SEV_ES_ENABLE;
		@@ -2952,8 +2973,11 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
		/*
		* An SEV-ES guest requires a VMSA area that is a separate from the
		* VMCB page. Do not include the encryption mask on the VMSA physical
		* address since hardware will access it using the guest key.
		* address since hardware will access it using the guest key. Note,
		* the VMSA will be NULL if this vCPU is the destination for intrahost
		* migration, and will be copied later.
		*/
		if (svm->sev_es.vmsa)
		svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);

		/* Can't intercept CR register access, HV can't modify CR registers */
		@@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
		svm_set_intercept(svm, TRAP_CR4_WRITE);
		svm_set_intercept(svm, TRAP_CR8_WRITE);

		/* No support for enable_vmware_backdoor */
		clr_exception_intercept(svm, GP_VECTOR);
		vmcb->control.intercepts[INTERCEPT_DR] = 0;
		if (!sev_es_debug_swap_enabled) {
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
		recalc_intercepts(svm);
		} else {
		/*
		* Disable #DB intercept iff DebugSwap is enabled. KVM doesn't
		* allow debugging SEV-ES guests, and enables DebugSwap iff
		* NO_NESTED_DATA_BP is supported, so there's no reason to
		* intercept #DB when DebugSwap is enabled. For simplicity
		* with respect to guest debug, intercept #DB for other VMs
		* even if NO_NESTED_DATA_BP is supported, i.e. even if the
		* guest can't DoS the CPU with infinite #DB vectoring.
		*/
		clr_exception_intercept(svm, DB_VECTOR);
		}

		/* Can't intercept XSETBV, HV can't modify XCR0 directly */
		svm_clr_intercept(svm, INTERCEPT_XSETBV);
		@@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
		svm->vmcb->control.nested_ctl \|= SVM_NESTED_CTL_SEV_ENABLE;
		clr_exception_intercept(svm, UD_VECTOR);

		/*
		* Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
		* KVM can't decrypt guest memory to decode the faulting instruction.
		*/
		clr_exception_intercept(svm, GP_VECTOR);

		if (sev_es_guest(svm->vcpu.kvm))
		sev_es_init_vmcb(svm);
		}
		@@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
		void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
		{
		/*
		* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
		* of which one step is to perform a VMLOAD. KVM performs the
		* corresponding VMSAVE in svm_prepare_guest_switch for both
		* traditional and SEV-ES guests.
		* All host state for SEV-ES guests is categorized into three swap types
		* based on how it is handled by hardware during a world switch:
		*
		* A: VMRUN: Host state saved in host save area
		* VMEXIT: Host state loaded from host save area
		*
		* B: VMRUN: Host state _NOT_ saved in host save area
		* VMEXIT: Host state loaded from host save area
		*
		* C: VMRUN: Host state _NOT_ saved in host save area
		* VMEXIT: Host state initialized to default(reset) values
		*
		* Manually save type-B state, i.e. state that is loaded by VMEXIT but
		* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
		* by common SVM code).
		*/

		/* XCR0 is restored on VMEXIT, save the current host value */
		hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);

		/* PKRU is restored on VMEXIT, save the current host value */
		hostsa->pkru = read_pkru();

		/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
		hostsa->xss = host_xss;

		/*
		* If DebugSwap is enabled, debug registers are loaded but NOT saved by
		* the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
		* saves and loads debug registers (Type-A).
		*/
		if (sev_es_debug_swap_enabled) {
		hostsa->dr0 = native_get_debugreg(0);
		hostsa->dr1 = native_get_debugreg(1);
		hostsa->dr2 = native_get_debugreg(2);
		hostsa->dr3 = native_get_debugreg(3);
		hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
		hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
		hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
		hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
		}
		}

		void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)

arch/x86/kvm/svm/svm.c

+112 −67

Original line number	Diff line number	Diff line
		@@ -203,7 +203,7 @@ static int nested = true;
		module_param(nested, int, S_IRUGO);

		/* enable/disable Next RIP Save */
		static int nrips = true;
		int nrips = true;
		module_param(nrips, int, 0444);

		/* enable/disable Virtual VMLOAD VMSAVE */
		@@ -365,6 +365,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
		svm->vmcb->control.int_state \|= SVM_INTERRUPT_SHADOW_MASK;

		}
		static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
		void *insn, int insn_len);

		static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
		bool commit_side_effects)
		@@ -385,6 +387,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
		}

		if (!svm->next_rip) {
		/*
		* FIXME: Drop this when kvm_emulate_instruction() does the
		* right thing and treats "can't emulate" as outright failure
		* for EMULTYPE_SKIP.
		*/
		if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
		return 0;

		if (unlikely(!commit_side_effects))
		old_rflags = svm->vmcb->save.rflags;

		@@ -677,6 +687,39 @@ static int svm_cpu_init(int cpu)

		}

		static void set_dr_intercepts(struct vcpu_svm *svm)
		{
		struct vmcb *vmcb = svm->vmcb01.ptr;

		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);

		recalc_intercepts(svm);
		}

		static void clr_dr_intercepts(struct vcpu_svm *svm)
		{
		struct vmcb *vmcb = svm->vmcb01.ptr;

		vmcb->control.intercepts[INTERCEPT_DR] = 0;

		recalc_intercepts(svm);
		}

		static int direct_access_msr_slot(u32 msr)
		{
		u32 i;
		@@ -947,50 +990,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
		svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
		}

		static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
		static struct vmcb svm_get_lbr_vmcb(struct vcpu_svm svm)
		{
		/*
		* If the LBR virtualization is disabled, the LBR msrs are always
		* kept in the vmcb01 to avoid copying them on nested guest entries.
		*
		* If nested, and the LBR virtualization is enabled/disabled, the msrs
		* are moved between the vmcb01 and vmcb02 as needed.
		* If LBR virtualization is disabled, the LBR MSRs are always kept in
		* vmcb01. If LBR virtualization is enabled and L1 is running VMs of
		* its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
		*/
		struct vmcb *vmcb =
		(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
		svm->vmcb : svm->vmcb01.ptr;

		switch (index) {
		case MSR_IA32_DEBUGCTLMSR:
		return vmcb->save.dbgctl;
		case MSR_IA32_LASTBRANCHFROMIP:
		return vmcb->save.br_from;
		case MSR_IA32_LASTBRANCHTOIP:
		return vmcb->save.br_to;
		case MSR_IA32_LASTINTFROMIP:
		return vmcb->save.last_excp_from;
		case MSR_IA32_LASTINTTOIP:
		return vmcb->save.last_excp_to;
		default:
		KVM_BUG(false, svm->vcpu.kvm,
		"%s: Unknown MSR 0x%x", __func__, index);
		return 0;
		}
		return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
		svm->vmcb01.ptr;
		}

		void svm_update_lbrv(struct kvm_vcpu *vcpu)
		{
		struct vcpu_svm *svm = to_svm(vcpu);

		bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
		DEBUGCTLMSR_LBR;

		bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
		LBR_CTL_ENABLE_MASK);

		if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
		if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
		enable_lbrv = true;
		bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
		bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) \|\|
		(is_guest_mode(vcpu) && svm->lbrv_enabled &&
		(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));

		if (enable_lbrv == current_enable_lbrv)
		return;
		@@ -1201,10 +1218,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
		* Guest access to VMware backdoor ports could legitimately
		* trigger #GP because of TSS I/O permission bitmap.
		* We intercept those #GP and allow access to them anyway
		* as VMware does. Don't intercept #GP for SEV guests as KVM can't
		* decrypt guest memory to decode the faulting instruction.
		* as VMware does.
		*/
		if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
		if (enable_vmware_backdoor)
		set_exception_intercept(svm, GP_VECTOR);

		svm_set_intercept(svm, INTERCEPT_INTR);
		@@ -1949,7 +1965,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
		{
		struct vcpu_svm *svm = to_svm(vcpu);

		if (vcpu->arch.guest_state_protected)
		if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
		return;

		get_debugreg(vcpu->arch.db[0], 0);
		@@ -2510,11 +2526,12 @@ static int iret_interception(struct kvm_vcpu *vcpu)
		{
		struct vcpu_svm *svm = to_svm(vcpu);

		WARN_ON_ONCE(sev_es_guest(vcpu->kvm));

		++vcpu->stat.nmi_window_exits;
		svm->awaiting_iret_completion = true;

		svm_clr_iret_intercept(svm);
		if (!sev_es_guest(vcpu->kvm))
		svm->nmi_iret_rip = kvm_rip_read(vcpu);

		kvm_make_request(KVM_REQ_EVENT, vcpu);
		@@ -2680,6 +2697,13 @@ static int dr_interception(struct kvm_vcpu *vcpu)
		unsigned long val;
		int err = 0;

		/*
		* SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
		* for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
		*/
		if (sev_es_guest(vcpu->kvm))
		return 1;

		if (vcpu->guest_debug == 0) {
		/*
		* No more DR vmexits; force a reload of the debug registers
		@@ -2802,11 +2826,19 @@ static int svm_get_msr(struct kvm_vcpu vcpu, struct msr_data msr_info)
		msr_info->data = svm->tsc_aux;
		break;
		case MSR_IA32_DEBUGCTLMSR:
		msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
		break;
		case MSR_IA32_LASTBRANCHFROMIP:
		msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
		break;
		case MSR_IA32_LASTBRANCHTOIP:
		msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
		break;
		case MSR_IA32_LASTINTFROMIP:
		msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
		break;
		case MSR_IA32_LASTINTTOIP:
		msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
		msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
		break;
		case MSR_VM_HSAVE_PA:
		msr_info->data = svm->nested.hsave_msr;
		@@ -3037,13 +3069,8 @@ static int svm_set_msr(struct kvm_vcpu vcpu, struct msr_data msr)
		if (data & DEBUGCTL_RESERVED_BITS)
		return 1;

		if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
		svm->vmcb->save.dbgctl = data;
		else
		svm->vmcb01.ptr->save.dbgctl = data;

		svm_get_lbr_vmcb(svm)->save.dbgctl = data;
		svm_update_lbrv(vcpu);

		break;
		case MSR_VM_HSAVE_PA:
		/*
		@@ -3769,6 +3796,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
		if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
		return; /* IRET will cause a vm exit */

		/*
		* SEV-ES guests are responsible for signaling when a vCPU is ready to
		* receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
		* KVM can't intercept and single-step IRET to detect when NMIs are
		* unblocked (architecturally speaking). See SVM_VMGEXIT_NMI_COMPLETE.
		*
		* Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
		* ignores SEV-ES guest writes to EFER.SVME and CLGI/STGI are not
		* supported NAEs in the GHCB protocol.
		*/
		if (sev_es_guest(vcpu->kvm))
		return;

		if (!gif_set(svm)) {
		if (vgif)
		svm_set_intercept(svm, INTERCEPT_STGI);
		@@ -3918,12 +3958,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
		svm->soft_int_injected = false;

		/*
		* If we've made progress since setting HF_IRET_MASK, we've
		* If we've made progress since setting awaiting_iret_completion, we've
		* executed an IRET and can allow NMI injection.
		*/
		if (svm->awaiting_iret_completion &&
		(sev_es_guest(vcpu->kvm) \|\|
		kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
		kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
		svm->awaiting_iret_completion = false;
		svm->nmi_masked = false;
		kvm_make_request(KVM_REQ_EVENT, vcpu);
		@@ -4651,15 +4690,24 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
		* and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
		* decode garbage.
		*
		* Inject #UD if KVM reached this point without an instruction buffer.
		* In practice, this path should never be hit by a well-behaved guest,
		* e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
		* is still theoretically reachable, e.g. via unaccelerated fault-like
		* AVIC access, and needs to be handled by KVM to avoid putting the
		* guest into an infinite loop. Injecting #UD is somewhat arbitrary,
		* but its the least awful option given lack of insight into the guest.
		* If KVM is NOT trying to simply skip an instruction, inject #UD if
		* KVM reached this point without an instruction buffer. In practice,
		* this path should never be hit by a well-behaved guest, e.g. KVM
		* doesn't intercept #UD or #GP for SEV guests, but this path is still
		* theoretically reachable, e.g. via unaccelerated fault-like AVIC
		* access, and needs to be handled by KVM to avoid putting the guest
		* into an infinite loop. Injecting #UD is somewhat arbitrary, but
		* its the least awful option given lack of insight into the guest.
		*
		* If KVM is trying to skip an instruction, simply resume the guest.
		* If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
		* will attempt to re-inject the INT3/INTO and skip the instruction.
		* In that scenario, retrying the INT3/INTO and hoping the guest will
		* make forward progress is the only option that has a chance of
		* success (and in practice it will work the vast majority of the time).
		*/
		if (unlikely(!insn)) {
		if (!(emul_type & EMULTYPE_SKIP))
		kvm_queue_exception(vcpu, UD_VECTOR);
		return false;
		}
		@@ -5112,9 +5160,11 @@ static __init int svm_hardware_setup(void)

		svm_adjust_mmio_mask();

		nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);

		/*
		* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
		* may be modified by svm_adjust_mmio_mask()).
		* may be modified by svm_adjust_mmio_mask()), as well as nrips.
		*/
		sev_hardware_setup();

		@@ -5126,11 +5176,6 @@ static __init int svm_hardware_setup(void)
		goto err;
		}

		if (nrips) {
		if (!boot_cpu_has(X86_FEATURE_NRIPS))
		nrips = false;
		}

		enable_apicv = avic = avic && avic_hardware_setup();

		if (!enable_apicv) {