Unverified Commit 1de0ecd5 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!4048 [OLK-5.10] Intel: Backport PEBS format 5 support to OLK-5.10 for GNR/SRF Timed PEBS enabling

Merge Pull Request from: @yunyingsun 
 
Title: Intel: Backport PEBS format 5 support to OLK-5.10 for GNR/SRF Timed PEBS enabling

Content:
To support the new feature "Timed PEBS" which will be supported since next Intel Xeon platforms Granite Rapids(GNR) and Sierra Forest(SRF), PEBS format 5 is a pre-requisite.

PEBS format 5 support has been included in mainline kernel since kernel v5.18-rc1:
ee28855a perf/x86/intel: Increase max number of the fixed counters
0144ba0c KVM: x86: use the KVM side max supported fixed counter
2145e77f perf/x86/intel: Enable PEBS format 5

LKML: https://lore.kernel.org/lkml/1643750603-100733-1-git-send-email-kan.liang@linux.intel.com/

Since the Timed PEBS kernel commit:
(v6.3-rc1) c87a3109 perf/x86: Support Retire Latency
has been included and merged along with the SRF core PMU PR:
https://gitee.com/openeuler/kernel/pulls/3689

and besides that we won't backport the Timed PEBS needed user space perf tool patches(to avoid introducing too many dependencies):
4e846311 perf script: Fix missing Retire Latency fields option documentation
957ed139 perf event x86: Add retire_lat when synthesizing PERF_SAMPLE_WEIGHT_STRUCT
e65f91b2 perf test x86: Support the retire_lat (Retire Latency) sample_type check
17f248aa perf script: Support Retire Latency
d7d213e0 perf report: Support Retire Latency

so patches for enabling PEBS format 5 are all that needed for Timed PEBS support on OLK-5.10.

Note 1: one dependent commit has been identified:
(v5.14-rc1) 4c58d922 perf/x86/intel: Fix PEBS-via-PT reload base value for Extended PEBS
which is needed by (2145e77f perf/x86/intel: Enable PEBS format 5).

Note 2: this PR dependent on https://gitee.com/openeuler/kernel/pulls/3689.

Intel-kernel issue:
https://gitee.com/openeuler/intel-kernel/issues/I8WXIM

Test:
We've verified on Intel internal GNR/SRF platforms that both PEBS and timed PEBS work fine with this PR.

1. PEBS format
Without this PR, on GNR/SRF, with (# dmesg | grep -i "performance events") it returns:
[ 0.557984] Performance Events: XSAVE Architectural LBR, no PEBS fmt5+ , AnyThread deprecated, Crestmont events, 32-deep LBR, full-width counters, Intel PMU driver.
With this PR, on GNR/SRF, with (# dmesg | grep -i "performance events) it returns:
[ 0.611077] Performance Events: XSAVE Architectural LBR, PEBS fmt4+-baseline, PEBS-via-PT , AnyThread deprecated, Crestmont events, 32-deep LBR, full-width counters, Intel PMU driver.

2. Timed PEBS
Without this PR, on GNR/SRF there's no "mem-loads"/"mem-stores" event available with "perf list".
With this PR, on GNR/SRF:
a. "mem-loads"/"mem-stores" events are available with "perf list".
b. Timed PEBS works as there's non-zero data at the last(third) column in output of second command:
`# perf record -e cpu/mem-loads,ldlat=3/P --weight -d`
`# perf report -D -i perf.data | grep weight`

Known issue:
N/A

Default config change:
N/A 
 
Link:https://gitee.com/openeuler/kernel/pulls/4048

 

Reviewed-by: default avatarXu Kuohai <xukuohai@huawei.com>
Reviewed-by: default avatarJason Zeng <jason.zeng@intel.com>
Reviewed-by: default avatarAichun Shi <aichun.shi@intel.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents 15b9b995 80498dfe
Loading
Loading
Loading
Loading
+39 −1
Original line number Diff line number Diff line
@@ -181,6 +181,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
	EVENT_CONSTRAINT_END
};

static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly =
{
	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
	FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
	FIXED_EVENT_CONSTRAINT(0x0500, 4),
	FIXED_EVENT_CONSTRAINT(0x0600, 5),
	FIXED_EVENT_CONSTRAINT(0x0700, 6),
	FIXED_EVENT_CONSTRAINT(0x0800, 7),
	FIXED_EVENT_CONSTRAINT(0x0900, 8),
	FIXED_EVENT_CONSTRAINT(0x0a00, 9),
	FIXED_EVENT_CONSTRAINT(0x0b00, 10),
	FIXED_EVENT_CONSTRAINT(0x0c00, 11),
	FIXED_EVENT_CONSTRAINT(0x0d00, 12),
	FIXED_EVENT_CONSTRAINT(0x0e00, 13),
	FIXED_EVENT_CONSTRAINT(0x0f00, 14),
	FIXED_EVENT_CONSTRAINT(0x1000, 15),
	EVENT_CONSTRAINT_END
};

static struct event_constraint intel_slm_event_constraints[] __read_mostly =
{
	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -6696,7 +6717,9 @@ __init int intel_pmu_init(void)
			pr_cont("generic architected perfmon v1, ");
			name = "generic_arch_v1";
			break;
		default:
		case 2:
		case 3:
		case 4:
			/*
			 * default constraints for v2 and up
			 */
@@ -6704,6 +6727,21 @@ __init int intel_pmu_init(void)
			pr_cont("generic architected perfmon, ");
			name = "generic_arch_v2+";
			break;
		default:
			/*
			 * The default constraints for v5 and up can support up to
			 * 16 fixed counters. For the fixed counters 4 and later,
			 * the pseudo-encoding is applied.
			 * The constraints may be cut according to the CPUID enumeration
			 * by inserting the EVENT_CONSTRAINT_END.
			 */
			if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
				x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
			intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1;
			x86_pmu.event_constraints = intel_v5_gen_event_constraints;
			pr_cont("generic architected perfmon, ");
			name = "generic_arch_v5+";
			break;
		}
	}

+22 −6
Original line number Diff line number Diff line
@@ -1295,6 +1295,9 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	struct debug_store *ds = cpuc->ds;
	u64 value = ds->pebs_event_reset[hwc->idx];
	u32 base = MSR_RELOAD_PMC0;
	unsigned int idx = hwc->idx;

	if (!is_pebs_pt(event))
		return;
@@ -1304,7 +1307,15 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)

	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;

	wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
	if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
		base = MSR_RELOAD_FIXED_CTR0;
		idx = hwc->idx - INTEL_PMC_IDX_FIXED;
		if (x86_pmu.intel_cap.pebs_format < 5)
			value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
		else
			value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
	}
	wrmsrl(base + idx, value);
}

void intel_pmu_pebs_enable(struct perf_event *event)
@@ -1312,6 +1323,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	struct debug_store *ds = cpuc->ds;
	unsigned int idx = hwc->idx;

	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;

@@ -1330,19 +1342,22 @@ void intel_pmu_pebs_enable(struct perf_event *event)
		}
	}

	if (idx >= INTEL_PMC_IDX_FIXED) {
		if (x86_pmu.intel_cap.pebs_format < 5)
			idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
		else
			idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
	}

	/*
	 * Use auto-reload if possible to save a MSR write in the PMI.
	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
	 */
	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
		unsigned int idx = hwc->idx;

		if (idx >= INTEL_PMC_IDX_FIXED)
			idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
		ds->pebs_event_reset[idx] =
			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
	} else {
		ds->pebs_event_reset[hwc->idx] = 0;
		ds->pebs_event_reset[idx] = 0;
	}

	intel_pmu_pebs_via_pt_enable(event);
@@ -2306,6 +2321,7 @@ void __init intel_ds_init(void)
			break;

		case 4:
		case 5:
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
			if (x86_pmu.intel_cap.pebs_baseline) {
+3 −2
Original line number Diff line number Diff line
@@ -7,8 +7,9 @@
#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)

/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS		8
#define MAX_FIXED_PEBS_EVENTS	4
#define MAX_PEBS_EVENTS_FMT4	8
#define MAX_PEBS_EVENTS		32
#define MAX_FIXED_PEBS_EVENTS	16

/*
 * A debug store configuration.
+2 −1
Original line number Diff line number Diff line
@@ -446,6 +446,7 @@ struct kvm_pmc {
	bool is_paused;
};

#define KVM_PMC_MAX_FIXED	3
struct kvm_pmu {
	unsigned nr_arch_gp_counters;
	unsigned nr_arch_fixed_counters;
@@ -462,7 +463,7 @@ struct kvm_pmu {
	u64 raw_event_mask;
	u8 version;
	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
	struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
	struct irq_work irq_work;
	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@
 */

#define INTEL_PMC_MAX_GENERIC				       32
#define INTEL_PMC_MAX_FIXED					4
#define INTEL_PMC_MAX_FIXED				       16
#define INTEL_PMC_IDX_FIXED				       32

#define X86_PMC_IDX_MAX					       64
Loading