Commit 751d77fe authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-pmu-6.5' of https://github.com/kvm-x86/linux into HEAD

KVM x86/pmu changes for 6.5:

 - Add support for AMD PerfMonV2, with a variety of cleanups and minor fixes
   included along the way
parents 88de4b94 94cdeebd
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -13,7 +13,6 @@ BUILD_BUG_ON(1)
 * at the call sites.
 */
KVM_X86_PMU_OP(hw_event_available)
KVM_X86_PMU_OP(pmc_is_enabled)
KVM_X86_PMU_OP(pmc_idx_to_pmc)
KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
KVM_X86_PMU_OP(msr_idx_to_pmc)
+1 −1
Original line number Diff line number Diff line
@@ -523,7 +523,7 @@ struct kvm_pmu {
	u64 global_status;
	u64 counter_bitmask[2];
	u64 global_ctrl_mask;
	u64 global_ovf_ctrl_mask;
	u64 global_status_mask;
	u64 reserved_bits;
	u64 raw_event_mask;
	struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
+28 −2
Original line number Diff line number Diff line
@@ -729,6 +729,10 @@ void kvm_set_cpu_caps(void)
		F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */
	);

	kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
		F(PERFMON_V2)
	);

	/*
	 * Synthesize "LFENCE is serializing" into the AMD-defined entry in
	 * KVM's supported CPUID if the feature is reported as supported by the
@@ -943,7 +947,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
		union cpuid10_eax eax;
		union cpuid10_edx edx;

		if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
		if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
			break;
		}
@@ -1123,7 +1127,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
		entry->edx = 0;
		break;
	case 0x80000000:
		entry->eax = min(entry->eax, 0x80000021);
		entry->eax = min(entry->eax, 0x80000022);
		/*
		 * Serializing LFENCE is reported in a multitude of ways, and
		 * NullSegClearsBase is not reported in CPUID on Zen2; help
@@ -1228,6 +1232,28 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
		entry->ebx = entry->ecx = entry->edx = 0;
		cpuid_entry_override(entry, CPUID_8000_0021_EAX);
		break;
	/* AMD Extended Performance Monitoring and Debug */
	case 0x80000022: {
		union cpuid_0x80000022_ebx ebx;

		entry->ecx = entry->edx = 0;
		if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) {
			entry->eax = entry->ebx;
			break;
		}

		cpuid_entry_override(entry, CPUID_8000_0022_EAX);

		if (kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
			ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
		else if (kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE))
			ebx.split.num_core_pmc = AMD64_NUM_COUNTERS_CORE;
		else
			ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;

		entry->ebx = ebx.full;
		break;
	}
	/*Add support for Centaur's CPUID instruction*/
	case 0xC0000000:
		/*Just support up to 0xC0000004 now*/
+84 −8
Original line number Diff line number Diff line
@@ -93,11 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}

static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
{
	return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
}

static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
{
	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
@@ -562,6 +557,14 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)

bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
{
	switch (msr) {
	case MSR_CORE_PERF_GLOBAL_STATUS:
	case MSR_CORE_PERF_GLOBAL_CTRL:
	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
		return kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu));
	default:
		break;
	}
	return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) ||
		static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr);
}
@@ -577,15 +580,88 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)

int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
	u32 msr = msr_info->index;

	switch (msr) {
	case MSR_CORE_PERF_GLOBAL_STATUS:
	case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
		msr_info->data = pmu->global_status;
		break;
	case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
	case MSR_CORE_PERF_GLOBAL_CTRL:
		msr_info->data = pmu->global_ctrl;
		break;
	case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
		msr_info->data = 0;
		break;
	default:
		return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
	}

	return 0;
}

int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
	u32 msr = msr_info->index;
	u64 data = msr_info->data;
	u64 diff;

	/*
	 * Note, AMD ignores writes to reserved bits and read-only PMU MSRs,
	 * whereas Intel generates #GP on attempts to write reserved/RO MSRs.
	 */
	switch (msr) {
	case MSR_CORE_PERF_GLOBAL_STATUS:
		if (!msr_info->host_initiated)
			return 1; /* RO MSR */
		fallthrough;
	case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
		/* Per PPR, Read-only MSR. Writes are ignored. */
		if (!msr_info->host_initiated)
			break;

		if (data & pmu->global_status_mask)
			return 1;

		pmu->global_status = data;
		break;
	case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
		data &= ~pmu->global_ctrl_mask;
		fallthrough;
	case MSR_CORE_PERF_GLOBAL_CTRL:
		if (!kvm_valid_perf_global_ctrl(pmu, data))
			return 1;

		if (pmu->global_ctrl != data) {
			diff = pmu->global_ctrl ^ data;
			pmu->global_ctrl = data;
			reprogram_counters(pmu, diff);
		}
		break;
	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
		/*
		 * GLOBAL_OVF_CTRL, a.k.a. GLOBAL STATUS_RESET, clears bits in
		 * GLOBAL_STATUS, and so the set of reserved bits is the same.
		 */
		if (data & pmu->global_status_mask)
			return 1;
		fallthrough;
	case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
		if (!msr_info->host_initiated)
			pmu->global_status &= ~data;
		break;
	default:
		kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
		return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
	}

	return 0;
}

/* refresh PMU settings. This function generally is called when underlying
 * settings are changed (such as changes of PMU CPUID by guest VMs), which
 * should rarely happen.
+51 −5
Original line number Diff line number Diff line
@@ -20,7 +20,6 @@

struct kvm_pmu_ops {
	bool (*hw_event_available)(struct kvm_pmc *pmc);
	bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
	struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
	struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
		unsigned int idx, u64 *mask);
@@ -37,10 +36,25 @@ struct kvm_pmu_ops {

	const u64 EVENTSEL_EVENT;
	const int MAX_NR_GP_COUNTERS;
	const int MIN_NR_GP_COUNTERS;
};

void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops);

static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
{
	/*
	 * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
	 * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
	 * greater than zero.  However, KVM only exposes and emulates the MSR
	 * to/for the guest if the guest PMU supports at least "Architectural
	 * Performance Monitoring Version 2".
	 *
	 * AMD's version of PERF_GLOBAL_CTRL conveniently shows up with v2.
	 */
	return pmu->version > 1;
}

static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
{
	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -161,6 +175,7 @@ extern struct x86_pmu_capability kvm_pmu_cap;
static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
{
	bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
	int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;

	/*
	 * Hybrid PMUs don't play nice with virtualization without careful
@@ -175,11 +190,15 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
		perf_get_x86_pmu_capability(&kvm_pmu_cap);

		/*
		 * For Intel, only support guest architectural pmu
		 * on a host with architectural pmu.
		 * WARN if perf did NOT disable hardware PMU if the number of
		 * architecturally required GP counters aren't present, i.e. if
		 * there are a non-zero number of counters, but fewer than what
		 * is architecturally required.
		 */
		if ((is_intel && !kvm_pmu_cap.version) ||
		    !kvm_pmu_cap.num_counters_gp)
		if (!kvm_pmu_cap.num_counters_gp ||
		    WARN_ON_ONCE(kvm_pmu_cap.num_counters_gp < min_nr_gp_ctrs))
			enable_pmu = false;
		else if (is_intel && !kvm_pmu_cap.version)
			enable_pmu = false;
	}

@@ -201,6 +220,33 @@ static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
	kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}

static inline void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
{
	int bit;

	if (!diff)
		return;

	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
		set_bit(bit, pmu->reprogram_pmi);
	kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu));
}

/*
 * Check if a PMC is enabled by comparing it against global_ctrl bits.
 *
 * If the vPMU doesn't have global_ctrl MSR, all vPMCs are enabled.
 */
static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
{
	struct kvm_pmu *pmu = pmc_to_pmu(pmc);

	if (!kvm_pmu_has_perf_global_ctrl(pmu))
		return true;

	return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
}

void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
Loading