Unverified Commit 50c172c5 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!226 Intel SPR: Adding core PMU support for OLK-5.10

Merge Pull Request from: @allen-shi 
 
This is a cherry-pick of [PR62](https://gitee.com/openeuler/kernel/pulls/62) from openEuler-22.09 branch.


This patchset is to enable Intel SPR core PMU support.
The first 5 patches enable Intel SPR core PMU support.
The last 3 patches fix bugs introduced by the above 5 patches.

 **Intel Kernel Issue** 
[#I596BF](https://gitee.com/openeuler/intel-kernel/issues/I596BF)

 **Test:** 
With this patch set:

```
        \# perf stat -e L1-dcache-load-misses -- sleep 1
        
         Performance counter stats for 'sleep 1':
        
                    10,134      L1-dcache-load-misses
        
               1.000727264 seconds time elapsed
        
               0.000584000 seconds user
               0.000000000 seconds sys
```

Without this patch set, it returns event "not supported":

```
        # perf stat -e L1-dcache-load-misses -- sleep 1
        
         Performance counter stats for 'sleep 1':
        
           <not supported>      L1-dcache-load-misses
        
               1.001434664 seconds time elapsed
        
               0.000000000 seconds user
               0.001318000 seconds sys
```

Some other tests:

```
# perf mem record -- sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.030 MB perf.data (24 samples) ]
[root@spr4s01 linux]# perf script --fields comm,weight,ip,sym
       perf-exec                0 ffffffffaf80cb0b __intel_pmu_enable_all.constprop.0
       perf-exec              489 ffffffffafdfa612 acpi_os_read_memory
       perf-exec              359 ffffffffafa67daa perf_iterate_ctx
       perf-exec              435 ffffffffafa67daa perf_iterate_ctx
       perf-exec              123 ffffffffafa6d0eb perf_event_addr_filters_exec
       perf-exec              168 ffffffffafa67daa perf_iterate_ctx
           sleep              163 ffffffffafa67d72 perf_iterate_ctx
           sleep              174 ffffffffafa67d72 perf_iterate_ctx
           sleep               83 ffffffffafa67d92 perf_iterate_ctx
           sleep              159 ffffffffafa67d7c perf_iterate_ctx
           sleep              133 ffffffffafa67d7c perf_iterate_ctx
           sleep                0 ffffffffafa886aa filemap_map_pages
           sleep                0 ffffffffaf80cb0b __intel_pmu_enable_all.constprop.0
           sleep              184 ffffffffafa67d92 perf_iterate_ctx
           sleep                0 ffffffffb025029e sync_regs
           sleep                0 ffffffffafa886aa filemap_map_pages
           sleep                0     7ffb7f2b7ba0 [unknown]
           sleep                0     7ffb7f2b7cd1 [unknown]
           sleep              150 ffffffffafa67d92 perf_iterate_ctx
           sleep                0 ffffffffafa886aa filemap_map_pages
           sleep                0 ffffffffafae8a37 page_remove_file_rmap
           sleep                0 ffffffffafad1c2f zap_pte_range
           sleep                0 ffffffffafad1c2f zap_pte_range
           sleep                0 ffffffffafaec00c unlink_anon_vmas

# perf record -e cpu/event=0xcd,umask=0x1/ -e cpu/event=0x03,umask=0x82/ -- cat /proc/cpuinfo
    ……
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.029 MB perf.data (175 samples) ]
# perf script|head -5
             cat 254544 10879.930447:          1 cpu/event=0x03,umask=0x82/:  ffffffffa7653768 memcpy_erms+0x8 (vmlinux)
             cat 254544 10879.930511:          1 cpu/event=0x03,umask=0x82/:  ffffffffa6cda323 vm_area_dup+0x63 (vmlinux)
             cat 254544 10879.930513:          2 cpu/event=0x03,umask=0x82/:  ffffffffa6edf233 __split_vma+0xa3 (vmlinux)
             cat 254544 10879.930564:         32 cpu/event=0x03,umask=0x82/:  ffffffffa6e8b2a7 filemap_fault+0x2d7 (vmlinux)
             cat 254544 10879.930656:         48 cpu/event=0x03,umask=0x82/:  ffffffffa76504de sync_regs+0x1e (vmlinux)

        
# perf record -e instructions:ppp -a -- true
[ perf record: Woken up 0 times to write data ]
[ perf record: Captured and wrote 0.775 MB perf.data (3142 samples) ]
# perf script|head -5
            perf 254537 [000] 10812.672089:          1 instructions:ppp:  ffffffffa764f883 do_syscall_64+0x33 (vmlinux)
            perf 254537 [000] 10812.672101:          1 instructions:ppp:  ffffffffa6c262cd nmi_handle+0x6d (vmlinux)
            perf 254537 [000] 10812.672103:          4 instructions:ppp:  ffffffffa6c2bcb5 native_sched_clock+0x5 (vmlinux)
            perf 254537 [000] 10812.672104:         74 instructions:ppp:  ffffffffa6c2bcb5 native_sched_clock+0x5 (vmlinux)
            perf 254537 [000] 10812.672105:       1779 instructions:ppp:  ffffffffa6c2bcb5 native_sched_clock+0x5 (vmlinux)

# perf record -d --data-page-size -e major-faults -a -- sleep 1
Error:
Asking for the data page size isnt supported by this kernel.

# perf record -d -e major-faults -a -- sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.497 MB perf.data (3 samples) ]
# perf script
            perf 25239 [447]   660.473507:          1 major-faults:            5c6d74 perf_mmap__consume+0x24 (/usr/bin/perf)
            perf 25239 [447]   660.473876:          1 major-faults:            5c6d74 perf_mmap__consume+0x24 (/usr/bin/perf)
            perf 25239 [447]   660.474401:          1 major-faults:            5c6d74 perf_mmap__consume+0x24 (/usr/bin/perf)
# perf script --fields comm,addr
            perf     7fe0459a3408
            perf     7fe03ec0d408
            perf     7fe0333de408
```

 **Known Issue:** 
N/A

 **Default Config Change:** 
N/A 
 
Link:https://gitee.com/openeuler/kernel/pulls/226

 
Reviewed-by: default avatarChen Wei <chenwei@xfusion.com>
Reviewed-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 188b2ee0 de1c4296
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -2215,7 +2215,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,

		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
						ppmu->get_mem_weight)
			ppmu->get_mem_weight(&data.weight);
			ppmu->get_mem_weight(&data.weight.full);

		if (perf_event_overflow(event, &data, regs))
			power_pmu_stop(event, 0);
+7 −1
Original line number Diff line number Diff line
@@ -255,6 +255,8 @@ static bool check_hw_exists(void)
		if (ret)
			goto msr_fail;
		for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
			if (fixed_counter_disabled(i))
				continue;
			if (val & (0x03 << i*4)) {
				bios_fail = 1;
				val_fail = val;
@@ -1534,6 +1536,8 @@ void perf_event_print_debug(void)
			cpu, idx, prev_left);
	}
	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
		if (fixed_counter_disabled(idx))
			continue;
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -2006,7 +2010,9 @@ static int __init init_hw_perf_events(void)
	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
	pr_info("... fixed-purpose events:   %lu\n",
			hweight64((((1ULL << x86_pmu.num_counters_fixed) - 1)
					<< INTEL_PMC_IDX_FIXED) & x86_pmu.intel_ctrl));
	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);

	if (!x86_pmu.read)
+355 −23
Original line number Diff line number Diff line
@@ -276,6 +276,57 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
	EVENT_EXTRA_END
};

static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
	INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
	INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
	EVENT_EXTRA_END
};

static struct event_constraint intel_spr_event_constraints[] = {
	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
	FIXED_EVENT_CONSTRAINT(0x01c0, 0),	/* INST_RETIRED.PREC_DIST */
	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),

	INTEL_EVENT_CONSTRAINT(0x2e, 0xff),
	INTEL_EVENT_CONSTRAINT(0x3c, 0xff),
	/*
	 * Generally event codes < 0x90 are restricted to counters 0-3.
	 * The 0x2E and 0x3C are exception, which has no restriction.
	 */
	INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),

	INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
	INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
	INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf),
	INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
	INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
	INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1),
	INTEL_EVENT_CONSTRAINT(0xce, 0x1),
	INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
	/*
	 * Generally event codes >= 0x90 are likely to have no restrictions.
	 * The exception are defined as above.
	 */
	INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff),

	EVENT_CONSTRAINT_END
};


EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
@@ -320,6 +371,10 @@ EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
EVENT_ATTR_STR(topdown-bad-spec,	td_bad_spec,		"event=0x00,umask=0x81");
EVENT_ATTR_STR(topdown-fe-bound,	td_fe_bound,		"event=0x00,umask=0x82");
EVENT_ATTR_STR(topdown-be-bound,	td_be_bound,		"event=0x00,umask=0x83");
EVENT_ATTR_STR(topdown-heavy-ops,	td_heavy_ops,		"event=0x00,umask=0x84");
EVENT_ATTR_STR(topdown-br-mispredict,	td_br_mispredict,	"event=0x00,umask=0x85");
EVENT_ATTR_STR(topdown-fetch-lat,	td_fetch_lat,		"event=0x00,umask=0x86");
EVENT_ATTR_STR(topdown-mem-bound,	td_mem_bound,		"event=0x00,umask=0x87");

static struct attribute *snb_events_attrs[] = {
	EVENT_PTR(td_slots_issued),
@@ -385,6 +440,108 @@ static u64 intel_pmu_event_map(int hw_event)
	return intel_perfmon_event_map[hw_event];
}

static __initconst const u64 spr_hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(L1D ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x81d0,
		[ C(RESULT_MISS)   ] = 0xe124,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x82d0,
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_MISS)   ] = 0xe424,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(LL  ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x12a,
		[ C(RESULT_MISS)   ] = 0x12a,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x12a,
		[ C(RESULT_MISS)   ] = 0x12a,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x81d0,
		[ C(RESULT_MISS)   ] = 0xe12,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x82d0,
		[ C(RESULT_MISS)   ] = 0xe13,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = 0xe11,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x4c4,
		[ C(RESULT_MISS)   ] = 0x4c5,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(NODE) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x12a,
		[ C(RESULT_MISS)   ] = 0x12a,
	},
 },
};

static __initconst const u64 spr_hw_cache_extra_regs
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(LL  ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x10001,
		[ C(RESULT_MISS)   ] = 0x3fbfc00001,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x3f3ffc0002,
		[ C(RESULT_MISS)   ] = 0x3f3fc00002,
	},
 },
 [ C(NODE) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x10c000001,
		[ C(RESULT_MISS)   ] = 0x3fb3000001,
	},
 },
};

/*
 * Notes on the events:
 * - data reads do not include code reads (comparable to earlier tables)
@@ -2325,8 +2482,8 @@ static void __icl_update_topdown_event(struct perf_event *event,
	}
}

static void update_saved_topdown_regs(struct perf_event *event,
				      u64 slots, u64 metrics)
static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
				      u64 metrics, int metric_end)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct perf_event *other;
@@ -2335,7 +2492,7 @@ static void update_saved_topdown_regs(struct perf_event *event,
	event->hw.saved_slots = slots;
	event->hw.saved_metric = metrics;

	for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
	for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
		if (!is_topdown_idx(idx))
			continue;
		other = cpuc->events[idx];
@@ -2350,7 +2507,8 @@ static void update_saved_topdown_regs(struct perf_event *event,
 * The PERF_METRICS and Fixed counter 3 are read separately. The values may be
 * modify by a NMI. PMU has to be disabled before calling this function.
 */
static u64 icl_update_topdown_event(struct perf_event *event)

static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct perf_event *other;
@@ -2366,7 +2524,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
	/* read PERF_METRICS */
	rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);

	for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
	for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
		if (!is_topdown_idx(idx))
			continue;
		other = cpuc->events[idx];
@@ -2392,7 +2550,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
		 * Don't need to reset the PERF_METRICS and Fixed counter 3.
		 * Because the values will be restored in next schedule in.
		 */
		update_saved_topdown_regs(event, slots, metrics);
		update_saved_topdown_regs(event, slots, metrics, metric_end);
		reset = false;
	}

@@ -2401,12 +2559,18 @@ static u64 icl_update_topdown_event(struct perf_event *event)
		wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
		wrmsrl(MSR_PERF_METRICS, 0);
		if (event)
			update_saved_topdown_regs(event, 0, 0);
			update_saved_topdown_regs(event, 0, 0, metric_end);
	}

	return slots;
}

static u64 icl_update_topdown_event(struct perf_event *event)
{
	return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
						 x86_pmu.num_topdown_events - 1);
}

static void intel_pmu_read_topdown_event(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -2561,8 +2725,11 @@ static void intel_pmu_reset(void)
		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
	}
	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
		if (fixed_counter_disabled(idx))
			continue;
		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
	}

	if (ds)
		ds->bts_index = ds->bts_buffer_base;
@@ -3556,6 +3723,26 @@ static int core_pmu_hw_config(struct perf_event *event)
	return intel_pmu_bts_config(event);
}

#define INTEL_TD_METRIC_AVAILABLE_MAX	(INTEL_TD_METRIC_RETIRING + \
					 ((x86_pmu.num_topdown_events - 1) << 8))

static bool is_available_metric_event(struct perf_event *event)
{
	return is_metric_event(event) &&
		event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX;
}

static inline bool is_mem_loads_event(struct perf_event *event)
{
	return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01);
}

static inline bool is_mem_loads_aux_event(struct perf_event *event)
{
	return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
}


static int intel_pmu_hw_config(struct perf_event *event)
{
	int ret = x86_pmu_hw_config(event);
@@ -3635,7 +3822,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
		if (event->attr.config & X86_ALL_EVENT_FLAGS)
			return -EINVAL;

		if (is_metric_event(event)) {
		if (is_available_metric_event(event)) {
			struct perf_event *leader = event->group_leader;

			/* The metric events don't support sampling. */
@@ -3664,6 +3851,33 @@ static int intel_pmu_hw_config(struct perf_event *event)
		}
	}

	/*
	 * The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR
	 * doesn't function quite right. As a work-around it needs to always be
	 * co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82).
	 * The actual count of this second event is irrelevant it just needs
	 * to be active to make the first event function correctly.
	 *
	 * In a group, the auxiliary event must be in front of the load latency
	 * event. The rule is to simplify the implementation of the check.
	 * That's because perf cannot have a complete group at the moment.
	 */
	if (x86_pmu.flags & PMU_FL_MEM_LOADS_AUX &&
	    (event->attr.sample_type & PERF_SAMPLE_DATA_SRC) &&
	    is_mem_loads_event(event)) {
		struct perf_event *leader = event->group_leader;
		struct perf_event *sibling = NULL;

		if (!is_mem_loads_aux_event(leader)) {
			for_each_sibling_event(sibling, leader) {
				if (is_mem_loads_aux_event(sibling))
					break;
			}
			if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list))
				return -ENODATA;
		}
	}

	if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
		return 0;

@@ -3863,6 +4077,31 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
	return hsw_get_event_constraints(cpuc, idx, event);
}

static struct event_constraint *
spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
			  struct perf_event *event)
{
	struct event_constraint *c;

	c = icl_get_event_constraints(cpuc, idx, event);

	/*
	 * The :ppp indicates the Precise Distribution (PDist) facility, which
	 * is only supported on the GP counter 0. If a :ppp event which is not
	 * available on the GP counter 0, error out.
	 * Exception: Instruction PDIR is only available on the fixed counter 0.
	 */
	if ((event->attr.precise_ip == 3) &&
	    !constraint_match(&fixed0_constraint, event->hw.config)) {
		if (c->idxmsk64 & BIT_ULL(0))
			return &counter0_constraint;

		return &emptyconstraint;
	}

	return c;
}

static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
			  struct perf_event *event)
@@ -3952,6 +4191,14 @@ static u64 nhm_limit_period(struct perf_event *event, u64 left)
	return max(left, 32ULL);
}

static u64 spr_limit_period(struct perf_event *event, u64 left)
{
	if (event->attr.precise_ip == 3)
		return max(left, 128ULL);

	return left;
}

PMU_FORMAT_ATTR(event,	"config:0-7"	);
PMU_FORMAT_ATTR(umask,	"config:8-15"	);
PMU_FORMAT_ATTR(edge,	"config:18"	);
@@ -4718,6 +4965,42 @@ static struct attribute *icl_tsx_events_attrs[] = {
	NULL,
};


EVENT_ATTR_STR(mem-stores,	mem_st_spr,	"event=0xcd,umask=0x2");
EVENT_ATTR_STR(mem-loads-aux,	mem_ld_aux,	"event=0x03,umask=0x82");

static struct attribute *spr_events_attrs[] = {
	EVENT_PTR(mem_ld_hsw),
	EVENT_PTR(mem_st_spr),
	EVENT_PTR(mem_ld_aux),
	NULL,
};

static struct attribute *spr_td_events_attrs[] = {
	EVENT_PTR(slots),
	EVENT_PTR(td_retiring),
	EVENT_PTR(td_bad_spec),
	EVENT_PTR(td_fe_bound),
	EVENT_PTR(td_be_bound),
	EVENT_PTR(td_heavy_ops),
	EVENT_PTR(td_br_mispredict),
	EVENT_PTR(td_fetch_lat),
	EVENT_PTR(td_mem_bound),
	NULL,
};

static struct attribute *spr_tsx_events_attrs[] = {
	EVENT_PTR(tx_start),
	EVENT_PTR(tx_abort),
	EVENT_PTR(tx_commit),
	EVENT_PTR(tx_capacity_read),
	EVENT_PTR(tx_capacity_write),
	EVENT_PTR(tx_conflict),
	EVENT_PTR(cycles_t),
	EVENT_PTR(cycles_ct),
	NULL,
};

static ssize_t freeze_on_smi_show(struct device *cdev,
				  struct device_attribute *attr,
				  char *buf)
@@ -4941,7 +5224,7 @@ __init int intel_pmu_init(void)
	union cpuid10_eax eax;
	union cpuid10_ebx ebx;
	struct event_constraint *c;
	unsigned int unused;
	unsigned int fixed_mask;
	struct extra_reg *er;
	bool pmem = false;
	int version, i;
@@ -4963,7 +5246,7 @@ __init int intel_pmu_init(void)
	 * Check whether the Architectural PerfMon supports
	 * Branch Misses Retired hw_event or not.
	 */
	cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
	cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full);
	if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
		return -ENODEV;

@@ -4987,12 +5270,15 @@ __init int intel_pmu_init(void)
	 * Quirk: v2 perfmon does not report fixed-purpose events, so
	 * assume at least 3 events, when not running in a hypervisor:
	 */
	if (version > 1) {
	if (version > 1 && version < 5) {
		int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);

		x86_pmu.num_counters_fixed =
			max((int)edx.split.num_counters_fixed, assume);
	}

		fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
	} else if (version >= 5)
		x86_pmu.num_counters_fixed = fls(fixed_mask);

	if (version >= 4)
		x86_pmu.counter_freezing = !disable_counter_freezing;
@@ -5496,12 +5782,50 @@ __init int intel_pmu_init(void)
		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
		x86_pmu.lbr_pt_coexist = true;
		intel_pmu_pebs_data_source_skl(pmem);
		x86_pmu.num_topdown_events = 4;
		x86_pmu.update_topdown_event = icl_update_topdown_event;
		x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
		pr_cont("Icelake events, ");
		name = "icelake";
		break;

	case INTEL_FAM6_SAPPHIRERAPIDS_X:
		pmem = true;
		x86_pmu.late_ack = true;
		memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
		memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));

		x86_pmu.event_constraints = intel_spr_event_constraints;
		x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
		x86_pmu.extra_regs = intel_spr_extra_regs;
		x86_pmu.limit_period = spr_limit_period;
		x86_pmu.pebs_aliases = NULL;
		x86_pmu.pebs_prec_dist = true;
		x86_pmu.pebs_block = true;
		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
		x86_pmu.flags |= PMU_FL_PEBS_ALL;
		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;

		x86_pmu.hw_config = hsw_hw_config;
		x86_pmu.get_event_constraints = spr_get_event_constraints;
		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
			hsw_format_attr : nhm_format_attr;
		extra_skl_attr = skl_format_attr;
		mem_attr = spr_events_attrs;
		td_attr = spr_td_events_attrs;
		tsx_attr = spr_tsx_events_attrs;
		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
		x86_pmu.lbr_pt_coexist = true;
		intel_pmu_pebs_data_source_skl(pmem);
		x86_pmu.num_topdown_events = 8;
		x86_pmu.update_topdown_event = icl_update_topdown_event;
		x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
		pr_cont("Sapphire Rapids events, ");
		name = "sapphire_rapids";
		break;

	default:
		switch (x86_pmu.version) {
		case 1:
@@ -5544,8 +5868,7 @@ __init int intel_pmu_init(void)
		x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
	}

	x86_pmu.intel_ctrl |=
		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
	x86_pmu.intel_ctrl |= (u64)fixed_mask << INTEL_PMC_IDX_FIXED;

	/* AnyThread may be deprecated on arch perfmon v5 or later */
	if (x86_pmu.intel_cap.anythread_deprecated)
@@ -5562,12 +5885,21 @@ __init int intel_pmu_init(void)
			 * events to the generic counters.
			 */
			if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
				/*
				 * Disable topdown slots and metrics events,
				 * if slots event is not in CPUID.
				 */
				if (!(INTEL_PMC_MSK_FIXED_SLOTS & x86_pmu.intel_ctrl))
					c->idxmsk64 = 0;
				c->weight = hweight64(c->idxmsk64);
				continue;
			}

			if (c->cmask == FIXED_EVENT_FLAGS
			    && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
			if (c->cmask == FIXED_EVENT_FLAGS) {
				/* Disabled fixed counters which are not in CPUID */
				c->idxmsk64 &= x86_pmu.intel_ctrl;

				if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES)
					c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
			}
			c->idxmsk64 &=
+121 −10
Original line number Diff line number Diff line
@@ -36,7 +36,9 @@ union intel_x86_pebs_dse {
		unsigned int ld_dse:4;
		unsigned int ld_stlb_miss:1;
		unsigned int ld_locked:1;
		unsigned int ld_reserved:26;
		unsigned int ld_data_blk:1;
		unsigned int ld_addr_blk:1;
		unsigned int ld_reserved:24;
	};
	struct {
		unsigned int st_l1d_hit:1;
@@ -45,6 +47,12 @@ union intel_x86_pebs_dse {
		unsigned int st_locked:1;
		unsigned int st_reserved2:26;
	};
	struct {
		unsigned int st_lat_dse:4;
		unsigned int st_lat_stlb_miss:1;
		unsigned int st_lat_locked:1;
		unsigned int ld_reserved3:26;
	};
};


@@ -198,6 +206,63 @@ static u64 load_latency_data(u64 status)
	if (dse.ld_locked)
		val |= P(LOCK, LOCKED);

	/*
	 * Ice Lake and earlier models do not support block infos.
	 */
	if (!x86_pmu.pebs_block) {
		val |= P(BLK, NA);
		return val;
	}
	/*
	 * bit 6: load was blocked since its data could not be forwarded
	 *        from a preceding store
	 */
	if (dse.ld_data_blk)
		val |= P(BLK, DATA);

	/*
	 * bit 7: load was blocked due to potential address conflict with
	 *        a preceding store
	 */
	if (dse.ld_addr_blk)
		val |= P(BLK, ADDR);

	if (!dse.ld_data_blk && !dse.ld_addr_blk)
		val |= P(BLK, NA);

	return val;
}

static u64 store_latency_data(u64 status)
{
	union intel_x86_pebs_dse dse;
	u64 val;

	dse.val = status;

	/*
	 * use the mapping table for bit 0-3
	 */
	val = pebs_data_source[dse.st_lat_dse];

	/*
	 * bit 4: TLB access
	 * 0 = did not miss 2nd level TLB
	 * 1 = missed 2nd level TLB
	 */
	if (dse.st_lat_stlb_miss)
		val |= P(TLB, MISS) | P(TLB, L2);
	else
		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

	/*
	 * bit 5: locked prefix
	 */
	if (dse.st_lat_locked)
		val |= P(LOCK, LOCKED);

	val |= P(BLK, NA);

	return val;
}

@@ -870,6 +935,28 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
	EVENT_CONSTRAINT_END
};

struct event_constraint intel_spr_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),

	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
	INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
	INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),

	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),

	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),

	/*
	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
	 * need the full constraints from the main table.
	 */

	EVENT_CONSTRAINT_END
};

struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
	struct event_constraint *c;
@@ -960,7 +1047,8 @@ static void adaptive_pebs_record_size_update(void)
}

#define PERF_PEBS_MEMINFO_TYPE	(PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
				PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
				PERF_SAMPLE_PHYS_ADDR |			     \
				PERF_SAMPLE_WEIGHT_TYPE |		     \
				PERF_SAMPLE_TRANSACTION)

static u64 pebs_update_adaptive_cfg(struct perf_event *event)
@@ -986,7 +1074,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
	gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
	       (attr->sample_regs_intr & PEBS_GP_REGS);

	tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
	tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
		     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
		      x86_pmu.rtm_abort_event);

@@ -1330,6 +1418,8 @@ static u64 get_data_src(struct perf_event *event, u64 aux)

	if (fl & PERF_X86_EVENT_PEBS_LDLAT)
		val = load_latency_data(aux);
	else if (fl & PERF_X86_EVENT_PEBS_STLAT)
		val = store_latency_data(aux);
	else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
		val = precise_datala_hsw(event, aux);
	else if (fst)
@@ -1364,8 +1454,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
	/*
	 * Use latency for weight (only avail with PEBS-LL)
	 */
	if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
		data->weight = pebs->lat;
	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
		data->weight.full = pebs->lat;

	/*
	 * data.data_src encodes the data source
@@ -1457,8 +1547,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,

	if (x86_pmu.intel_cap.pebs_format >= 2) {
		/* Only set the TSX weight when no memory weight. */
		if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
			data->weight = intel_get_tsx_weight(pebs->tsx_tuning);
		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
			data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);

		if (sample_type & PERF_SAMPLE_TRANSACTION)
			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
@@ -1502,6 +1592,9 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
#endif
}

#define PEBS_LATENCY_MASK			0xffff
#define PEBS_CACHE_LATENCY_OFFSET		32

/*
 * With adaptive PEBS the layout depends on what fields are configured.
 */
@@ -1572,9 +1665,27 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
	}

	if (format_size & PEBS_DATACFG_MEMINFO) {
		if (sample_type & PERF_SAMPLE_WEIGHT)
			data->weight = meminfo->latency ?:
		if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
			u64 weight = meminfo->latency;

			if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
				data->weight.var2_w = weight & PEBS_LATENCY_MASK;
				weight >>= PEBS_CACHE_LATENCY_OFFSET;
			}

			/*
			 * Although meminfo::latency is defined as a u64,
			 * only the lower 32 bits include the valid data
			 * in practice on Ice Lake and earlier platforms.
			 */
			if (sample_type & PERF_SAMPLE_WEIGHT) {
				data->weight.full = weight ?:
					intel_get_tsx_weight(meminfo->tsx_tuning);
			} else {
				data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
					intel_get_tsx_weight(meminfo->tsx_tuning);
			}
		}

		if (sample_type & PERF_SAMPLE_DATA_SRC)
			data->data_src.val = get_data_src(event, meminfo->aux);
+17 −1
Original line number Diff line number Diff line
@@ -80,6 +80,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_PAIR		0x1000 /* Large Increment per Cycle */
#define PERF_X86_EVENT_LBR_SELECT	0x2000 /* Save/Restore MSR_LBR_SELECT */
#define PERF_X86_EVENT_TOPDOWN		0x4000 /* Count Topdown slots/metrics events */
#define PERF_X86_EVENT_PEBS_STLAT	0x8000 /* st+stlat data address sampling */

static inline bool is_topdown_count(struct perf_event *event)
{
@@ -445,6 +446,10 @@ struct cpu_hw_events {
	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
			   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)

#define INTEL_PSD_CONSTRAINT(c, n)	\
	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
			   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT)

#define INTEL_PST_CONSTRAINT(c, n)	\
	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
@@ -726,7 +731,8 @@ struct x86_pmu {
			pebs_broken		:1,
			pebs_prec_dist		:1,
			pebs_no_tlb		:1,
			pebs_no_isolation	:1;
			pebs_no_isolation	:1,
			pebs_block		:1;
	int		pebs_record_size;
	int		pebs_buffer_size;
	int		max_pebs_events;
@@ -778,6 +784,7 @@ struct x86_pmu {
	/*
	 * Intel perf metrics
	 */
	int		num_topdown_events;
	u64		(*update_topdown_event)(struct perf_event *event);
	int		(*set_topdown_event_period)(struct perf_event *event);

@@ -873,6 +880,8 @@ do { \
#define PMU_FL_PEBS_ALL		0x10 /* all events are valid PEBS events */
#define PMU_FL_TFA		0x20 /* deal with TSX force abort */
#define PMU_FL_PAIR		0x40 /* merge counters for large incr. events */
#define PMU_FL_INSTR_LATENCY	0x80 /* Support Instruction Latency in PEBS Memory Info Record */
#define PMU_FL_MEM_LOADS_AUX	0x100 /* Require an auxiliary event for the complete memory info */

#define EVENT_VAR(_id)  event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1063,6 +1072,11 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
			  char *page);

static inline bool fixed_counter_disabled(int i)
{
	return !(x86_pmu.intel_ctrl >> (i + INTEL_PMC_IDX_FIXED));
}

#ifdef CONFIG_CPU_SUP_AMD

int amd_pmu_init(void);
@@ -1162,6 +1176,8 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];

extern struct event_constraint intel_icl_pebs_event_constraints[];

extern struct event_constraint intel_spr_pebs_event_constraints[];

struct event_constraint *intel_pebs_constraints(struct perf_event *event);

void intel_pmu_pebs_add(struct perf_event *event);
Loading