Commit 54744e17 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini
Browse files

KVM: SVM: Move svm_hardware_setup() and its helpers below svm_x86_ops



Move svm_hardware_setup() below svm_x86_ops so that KVM can modify ops
during setup, e.g. the vcpu_(un)blocking hooks can be nullified if AVIC
is disabled or unsupported.

No functional change intended.

Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-Id: <20211208015236.1616697-23-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 935a7333
Loading
Loading
Loading
Loading
+234 −233
Original line number Diff line number Diff line
@@ -869,47 +869,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
	}
}

/*
 * The default MMIO mask is a single bit (excluding the present bit),
 * which could conflict with the memory encryption bit. Check for
 * memory encryption support and override the default MMIO mask if
 * memory encryption is enabled.
 */
static __init void svm_adjust_mmio_mask(void)
{
	unsigned int enc_bit, mask_bit;
	u64 msr, mask;

	/* If there is no memory encryption support, use existing mask */
	if (cpuid_eax(0x80000000) < 0x8000001f)
		return;

	/* If memory encryption is not enabled, use existing mask */
	rdmsrl(MSR_AMD64_SYSCFG, msr);
	if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
		return;

	enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
	mask_bit = boot_cpu_data.x86_phys_bits;

	/* Increment the mask bit if it is the same as the encryption bit */
	if (enc_bit == mask_bit)
		mask_bit++;

	/*
	 * If the mask bit location is below 52, then some bits above the
	 * physical addressing limit will always be reserved, so use the
	 * rsvd_bits() function to generate the mask. This mask, along with
	 * the present bit, will be used to generate a page fault with
	 * PFER.RSV = 1.
	 *
	 * If the mask bit location is 52 (or above), then clear the mask.
	 */
	mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;

	kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
}

static void svm_hardware_teardown(void)
{
	int cpu;
@@ -924,198 +883,6 @@ static void svm_hardware_teardown(void)
	iopm_base = 0;
}

static __init void svm_set_cpu_caps(void)
{
	kvm_set_cpu_caps();

	supported_xss = 0;

	/* CPUID 0x80000001 and 0x8000000A (SVM features) */
	if (nested) {
		kvm_cpu_cap_set(X86_FEATURE_SVM);

		if (nrips)
			kvm_cpu_cap_set(X86_FEATURE_NRIPS);

		if (npt_enabled)
			kvm_cpu_cap_set(X86_FEATURE_NPT);

		if (tsc_scaling)
			kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);

		/* Nested VM can receive #VMEXIT instead of triggering #GP */
		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
	}

	/* CPUID 0x80000008 */
	if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
	    boot_cpu_has(X86_FEATURE_AMD_SSBD))
		kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);

	/* AMD PMU PERFCTR_CORE CPUID */
	if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
		kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);

	/* CPUID 0x8000001F (SME/SEV features) */
	sev_set_cpu_caps();
}

static __init int svm_hardware_setup(void)
{
	int cpu;
	struct page *iopm_pages;
	void *iopm_va;
	int r;
	unsigned int order = get_order(IOPM_SIZE);

	/*
	 * NX is required for shadow paging and for NPT if the NX huge pages
	 * mitigation is enabled.
	 */
	if (!boot_cpu_has(X86_FEATURE_NX)) {
		pr_err_ratelimited("NX (Execute Disable) not supported\n");
		return -EOPNOTSUPP;
	}
	kvm_enable_efer_bits(EFER_NX);

	iopm_pages = alloc_pages(GFP_KERNEL, order);

	if (!iopm_pages)
		return -ENOMEM;

	iopm_va = page_address(iopm_pages);
	memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;

	init_msrpm_offsets();

	supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);

	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
		kvm_enable_efer_bits(EFER_FFXSR);

	if (tsc_scaling) {
		if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
			tsc_scaling = false;
		} else {
			pr_info("TSC scaling supported\n");
			kvm_has_tsc_control = true;
			kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
			kvm_tsc_scaling_ratio_frac_bits = 32;
		}
	}

	tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);

	/* Check for pause filtering support */
	if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
		pause_filter_count = 0;
		pause_filter_thresh = 0;
	} else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
		pause_filter_thresh = 0;
	}

	if (nested) {
		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
	}

	/*
	 * KVM's MMU doesn't support using 2-level paging for itself, and thus
	 * NPT isn't supported if the host is using 2-level paging since host
	 * CR4 is unchanged on VMRUN.
	 */
	if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
		npt_enabled = false;

	if (!boot_cpu_has(X86_FEATURE_NPT))
		npt_enabled = false;

	/* Force VM NPT level equal to the host's paging level */
	kvm_configure_mmu(npt_enabled, get_npt_level(),
			  get_npt_level(), PG_LEVEL_1G);
	pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");

	/* Note, SEV setup consumes npt_enabled. */
	sev_hardware_setup();

	svm_hv_hardware_setup();

	svm_adjust_mmio_mask();

	for_each_possible_cpu(cpu) {
		r = svm_cpu_init(cpu);
		if (r)
			goto err;
	}

	if (nrips) {
		if (!boot_cpu_has(X86_FEATURE_NRIPS))
			nrips = false;
	}

	enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);

	if (enable_apicv) {
		pr_info("AVIC enabled\n");

		amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
	}

	if (vls) {
		if (!npt_enabled ||
		    !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
		    !IS_ENABLED(CONFIG_X86_64)) {
			vls = false;
		} else {
			pr_info("Virtual VMLOAD VMSAVE supported\n");
		}
	}

	if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
		svm_gp_erratum_intercept = false;

	if (vgif) {
		if (!boot_cpu_has(X86_FEATURE_VGIF))
			vgif = false;
		else
			pr_info("Virtual GIF supported\n");
	}

	if (lbrv) {
		if (!boot_cpu_has(X86_FEATURE_LBRV))
			lbrv = false;
		else
			pr_info("LBR virtualization supported\n");
	}

	if (!enable_pmu)
		pr_info("PMU virtualization is disabled\n");

	svm_set_cpu_caps();

	/*
	 * It seems that on AMD processors PTE's accessed bit is
	 * being set by the CPU hardware before the NPF vmexit.
	 * This is not expected behaviour and our tests fail because
	 * of it.
	 * A workaround here is to disable support for
	 * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
	 * In this case userspace can know if there is support using
	 * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
	 * it
	 * If future AMD CPU models change the behaviour described above,
	 * this variable can be changed accordingly
	 */
	allow_smaller_maxphyaddr = !npt_enabled;

	return 0;

err:
	svm_hardware_teardown();
	return r;
}

static void init_seg(struct vmcb_seg *seg)
{
	seg->selector = 0;
@@ -4738,6 +4505,240 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
};

/*
 * The default MMIO mask is a single bit (excluding the present bit),
 * which could conflict with the memory encryption bit. Check for
 * memory encryption support and override the default MMIO mask if
 * memory encryption is enabled.
 */
static __init void svm_adjust_mmio_mask(void)
{
	unsigned int enc_bit, mask_bit;
	u64 msr, mask;

	/* If there is no memory encryption support, use existing mask */
	if (cpuid_eax(0x80000000) < 0x8000001f)
		return;

	/* If memory encryption is not enabled, use existing mask */
	rdmsrl(MSR_AMD64_SYSCFG, msr);
	if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
		return;

	enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
	mask_bit = boot_cpu_data.x86_phys_bits;

	/* Increment the mask bit if it is the same as the encryption bit */
	if (enc_bit == mask_bit)
		mask_bit++;

	/*
	 * If the mask bit location is below 52, then some bits above the
	 * physical addressing limit will always be reserved, so use the
	 * rsvd_bits() function to generate the mask. This mask, along with
	 * the present bit, will be used to generate a page fault with
	 * PFER.RSV = 1.
	 *
	 * If the mask bit location is 52 (or above), then clear the mask.
	 */
	mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;

	kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
}

static __init void svm_set_cpu_caps(void)
{
	kvm_set_cpu_caps();

	supported_xss = 0;

	/* CPUID 0x80000001 and 0x8000000A (SVM features) */
	if (nested) {
		kvm_cpu_cap_set(X86_FEATURE_SVM);

		if (nrips)
			kvm_cpu_cap_set(X86_FEATURE_NRIPS);

		if (npt_enabled)
			kvm_cpu_cap_set(X86_FEATURE_NPT);

		if (tsc_scaling)
			kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);

		/* Nested VM can receive #VMEXIT instead of triggering #GP */
		kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
	}

	/* CPUID 0x80000008 */
	if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
	    boot_cpu_has(X86_FEATURE_AMD_SSBD))
		kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);

	/* AMD PMU PERFCTR_CORE CPUID */
	if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
		kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);

	/* CPUID 0x8000001F (SME/SEV features) */
	sev_set_cpu_caps();
}

static __init int svm_hardware_setup(void)
{
	int cpu;
	struct page *iopm_pages;
	void *iopm_va;
	int r;
	unsigned int order = get_order(IOPM_SIZE);

	/*
	 * NX is required for shadow paging and for NPT if the NX huge pages
	 * mitigation is enabled.
	 */
	if (!boot_cpu_has(X86_FEATURE_NX)) {
		pr_err_ratelimited("NX (Execute Disable) not supported\n");
		return -EOPNOTSUPP;
	}
	kvm_enable_efer_bits(EFER_NX);

	iopm_pages = alloc_pages(GFP_KERNEL, order);

	if (!iopm_pages)
		return -ENOMEM;

	iopm_va = page_address(iopm_pages);
	memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;

	init_msrpm_offsets();

	supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);

	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
		kvm_enable_efer_bits(EFER_FFXSR);

	if (tsc_scaling) {
		if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
			tsc_scaling = false;
		} else {
			pr_info("TSC scaling supported\n");
			kvm_has_tsc_control = true;
			kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
			kvm_tsc_scaling_ratio_frac_bits = 32;
		}
	}

	tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);

	/* Check for pause filtering support */
	if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
		pause_filter_count = 0;
		pause_filter_thresh = 0;
	} else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
		pause_filter_thresh = 0;
	}

	if (nested) {
		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
	}

	/*
	 * KVM's MMU doesn't support using 2-level paging for itself, and thus
	 * NPT isn't supported if the host is using 2-level paging since host
	 * CR4 is unchanged on VMRUN.
	 */
	if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
		npt_enabled = false;

	if (!boot_cpu_has(X86_FEATURE_NPT))
		npt_enabled = false;

	/* Force VM NPT level equal to the host's paging level */
	kvm_configure_mmu(npt_enabled, get_npt_level(),
			  get_npt_level(), PG_LEVEL_1G);
	pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");

	/* Note, SEV setup consumes npt_enabled. */
	sev_hardware_setup();

	svm_hv_hardware_setup();

	svm_adjust_mmio_mask();

	for_each_possible_cpu(cpu) {
		r = svm_cpu_init(cpu);
		if (r)
			goto err;
	}

	if (nrips) {
		if (!boot_cpu_has(X86_FEATURE_NRIPS))
			nrips = false;
	}

	enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);

	if (enable_apicv) {
		pr_info("AVIC enabled\n");

		amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
	}

	if (vls) {
		if (!npt_enabled ||
		    !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
		    !IS_ENABLED(CONFIG_X86_64)) {
			vls = false;
		} else {
			pr_info("Virtual VMLOAD VMSAVE supported\n");
		}
	}

	if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
		svm_gp_erratum_intercept = false;

	if (vgif) {
		if (!boot_cpu_has(X86_FEATURE_VGIF))
			vgif = false;
		else
			pr_info("Virtual GIF supported\n");
	}

	if (lbrv) {
		if (!boot_cpu_has(X86_FEATURE_LBRV))
			lbrv = false;
		else
			pr_info("LBR virtualization supported\n");
	}

	if (!enable_pmu)
		pr_info("PMU virtualization is disabled\n");

	svm_set_cpu_caps();

	/*
	 * It seems that on AMD processors PTE's accessed bit is
	 * being set by the CPU hardware before the NPF vmexit.
	 * This is not expected behaviour and our tests fail because
	 * of it.
	 * A workaround here is to disable support for
	 * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
	 * In this case userspace can know if there is support using
	 * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
	 * it
	 * If future AMD CPU models change the behaviour described above,
	 * this variable can be changed accordingly
	 */
	allow_smaller_maxphyaddr = !npt_enabled;

	return 0;

err:
	svm_hardware_teardown();
	return r;
}


static struct kvm_x86_init_ops svm_init_ops __initdata = {
	.cpu_has_kvm_support = has_svm,
	.disabled_by_bios = is_disabled,