Merge tag 'perf-core-2022-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (63e6053a) · Commits · EulixOS / Software / Kernel

arch/x86/events/amd/uncore.c

+120 −26

Original line number	Diff line number	Diff line
		@@ -21,7 +21,6 @@
		#define NUM_COUNTERS_NB 4
		#define NUM_COUNTERS_L2 4
		#define NUM_COUNTERS_L3 6
		#define MAX_COUNTERS 6

		#define RDPMC_BASE_NB 6
		#define RDPMC_BASE_LLC 10
		@@ -31,6 +30,7 @@
		#undef pr_fmt
		#define pr_fmt(fmt) "amd_uncore: " fmt

		static int pmu_version;
		static int num_counters_llc;
		static int num_counters_nb;
		static bool l3_mask;
		@@ -46,7 +46,7 @@ struct amd_uncore {
		u32 msr_base;
		cpumask_t *active_mask;
		struct pmu *pmu;
		struct perf_event *events[MAX_COUNTERS];
		struct perf_event **events;
		struct hlist_node node;
		};

		@@ -158,6 +158,16 @@ static int amd_uncore_add(struct perf_event *event, int flags)
		hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
		hwc->state = PERF_HES_UPTODATE \| PERF_HES_STOPPED;

		/*
		* The first four DF counters are accessible via RDPMC index 6 to 9
		* followed by the L3 counters from index 10 to 15. For processors
		* with more than four DF counters, the DF RDPMC assignments become
		* discontiguous as the additional counters are accessible starting
		* from index 16.
		*/
		if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
		hwc->event_base_rdpmc += NUM_COUNTERS_L3;

		if (flags & PERF_EF_START)
		amd_uncore_start(event, PERF_EF_RELOAD);

		@@ -209,10 +219,14 @@ static int amd_uncore_event_init(struct perf_event *event)
		{
		struct amd_uncore *uncore;
		struct hw_perf_event *hwc = &event->hw;
		u64 event_mask = AMD64_RAW_EVENT_MASK_NB;

		if (event->attr.type != event->pmu->type)
		return -ENOENT;

		if (pmu_version >= 2 && is_nb_event(event))
		event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;

		/*
		* NB and Last level cache counters (MSRs) are shared across all cores
		* that share the same NB / Last level cache. On family 16h and below,
		@@ -221,7 +235,7 @@ static int amd_uncore_event_init(struct perf_event *event)
		* out. So we do not support sampling and per-thread events via
		* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
		*/
		hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
		hwc->config = event->attr.config & event_mask;
		hwc->idx = -1;

		if (event->cpu < 0)
		@@ -247,6 +261,19 @@ static int amd_uncore_event_init(struct perf_event *event)
		return 0;
		}

		static umode_t
		amd_f17h_uncore_is_visible(struct kobject kobj, struct attribute attr, int i)
		{
		return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
		attr->mode : 0;
		}

		static umode_t
		amd_f19h_uncore_is_visible(struct kobject kobj, struct attribute attr, int i)
		{
		return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
		}

		static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
		struct device_attribute *attr,
		char *buf)
		@@ -287,8 +314,10 @@ static struct device_attribute format_attr_##_var = \

		DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
		DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
		DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
		DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
		DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
		DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
		DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
		DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
		DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
		DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
		@@ -297,20 +326,33 @@ DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3
		DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
		DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */

		/* Common DF and NB attributes */
		static struct attribute *amd_uncore_df_format_attr[] = {
		&format_attr_event12.attr, /* event14 if F17h+ */
		&format_attr_umask.attr,
		&format_attr_event12.attr, /* event */
		&format_attr_umask8.attr, /* umask */
		NULL,
		};

		/* Common L2 and L3 attributes */
		static struct attribute *amd_uncore_l3_format_attr[] = {
		&format_attr_event12.attr, /* event8 if F17h+ */
		&format_attr_umask.attr,
		NULL, /* slicemask if F17h, coreid if F19h */
		NULL, /* threadmask8 if F17h, enallslices if F19h */
		NULL, /* enallcores if F19h */
		NULL, /* sliceid if F19h */
		NULL, /* threadmask2 if F19h */
		&format_attr_event12.attr, /* event */
		&format_attr_umask8.attr, /* umask */
		NULL, /* threadmask */
		NULL,
		};

		/* F17h unique L3 attributes */
		static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
		&format_attr_slicemask.attr, /* slicemask */
		NULL,
		};

		/* F19h unique L3 attributes */
		static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
		&format_attr_coreid.attr, /* coreid */
		&format_attr_enallslices.attr, /* enallslices */
		&format_attr_enallcores.attr, /* enallcores */
		&format_attr_sliceid.attr, /* sliceid */
		NULL,
		};

		@@ -324,6 +366,18 @@ static struct attribute_group amd_uncore_l3_format_group = {
		.attrs = amd_uncore_l3_format_attr,
		};

		static struct attribute_group amd_f17h_uncore_l3_format_group = {
		.name = "format",
		.attrs = amd_f17h_uncore_l3_format_attr,
		.is_visible = amd_f17h_uncore_is_visible,
		};

		static struct attribute_group amd_f19h_uncore_l3_format_group = {
		.name = "format",
		.attrs = amd_f19h_uncore_l3_format_attr,
		.is_visible = amd_f19h_uncore_is_visible,
		};

		static const struct attribute_group *amd_uncore_df_attr_groups[] = {
		&amd_uncore_attr_group,
		&amd_uncore_df_format_group,
		@@ -336,6 +390,12 @@ static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
		NULL,
		};

		static const struct attribute_group *amd_uncore_l3_attr_update[] = {
		&amd_f17h_uncore_l3_format_group,
		&amd_f19h_uncore_l3_format_group,
		NULL,
		};

		static struct pmu amd_nb_pmu = {
		.task_ctx_nr = perf_invalid_context,
		.attr_groups = amd_uncore_df_attr_groups,
		@@ -353,6 +413,7 @@ static struct pmu amd_nb_pmu = {
		static struct pmu amd_llc_pmu = {
		.task_ctx_nr = perf_invalid_context,
		.attr_groups = amd_uncore_l3_attr_groups,
		.attr_update = amd_uncore_l3_attr_update,
		.name = "amd_l2",
		.event_init = amd_uncore_event_init,
		.add = amd_uncore_add,
		@@ -370,11 +431,19 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
		cpu_to_node(cpu));
		}

		static inline struct perf_event **
		amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
		{
		return kzalloc_node(sizeof(struct perf_event ) num, GFP_KERNEL,
		cpu_to_node(cpu));
		}

		static int amd_uncore_cpu_up_prepare(unsigned int cpu)
		{
		struct amd_uncore uncore_nb = NULL, uncore_llc;
		struct amd_uncore uncore_nb = NULL, uncore_llc = NULL;

		if (amd_uncore_nb) {
		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
		uncore_nb = amd_uncore_alloc(cpu);
		if (!uncore_nb)
		goto fail;
		@@ -384,11 +453,15 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
		uncore_nb->active_mask = &amd_nb_active_mask;
		uncore_nb->pmu = &amd_nb_pmu;
		uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu);
		if (!uncore_nb->events)
		goto fail;
		uncore_nb->id = -1;
		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
		}

		if (amd_uncore_llc) {
		*per_cpu_ptr(amd_uncore_llc, cpu) = NULL;
		uncore_llc = amd_uncore_alloc(cpu);
		if (!uncore_llc)
		goto fail;
		@@ -398,6 +471,9 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
		uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
		uncore_llc->active_mask = &amd_llc_active_mask;
		uncore_llc->pmu = &amd_llc_pmu;
		uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu);
		if (!uncore_llc->events)
		goto fail;
		uncore_llc->id = -1;
		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
		}
		@@ -405,9 +481,16 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
		return 0;

		fail:
		if (amd_uncore_nb)
		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
		if (uncore_nb) {
		kfree(uncore_nb->events);
		kfree(uncore_nb);
		}

		if (uncore_llc) {
		kfree(uncore_llc->events);
		kfree(uncore_llc);
		}

		return -ENOMEM;
		}

		@@ -540,8 +623,11 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
		if (cpu == uncore->cpu)
		cpumask_clear_cpu(cpu, uncore->active_mask);

		if (!--uncore->refcnt)
		if (!--uncore->refcnt) {
		kfree(uncore->events);
		kfree(uncore);
		}

		*per_cpu_ptr(uncores, cpu) = NULL;
		}

		@@ -560,6 +646,7 @@ static int __init amd_uncore_init(void)
		{
		struct attribute **df_attr = amd_uncore_df_format_attr;
		struct attribute **l3_attr = amd_uncore_l3_format_attr;
		union cpuid_0x80000022_ebx ebx;
		int ret = -ENODEV;

		if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
		@@ -569,6 +656,9 @@ static int __init amd_uncore_init(void)
		if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
		return -ENODEV;

		if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
		pmu_version = 2;

		num_counters_nb = NUM_COUNTERS_NB;
		num_counters_llc = NUM_COUNTERS_L2;
		if (boot_cpu_data.x86 >= 0x17) {
		@@ -585,8 +675,12 @@ static int __init amd_uncore_init(void)
		}

		if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
		if (boot_cpu_data.x86 >= 0x17)
		if (pmu_version >= 2) {
		*df_attr++ = &format_attr_event14v2.attr;
		*df_attr++ = &format_attr_umask12.attr;
		} else if (boot_cpu_data.x86 >= 0x17) {
		*df_attr = &format_attr_event14.attr;
		}

		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
		if (!amd_uncore_nb) {
		@@ -597,6 +691,11 @@ static int __init amd_uncore_init(void)
		if (ret)
		goto fail_nb;

		if (pmu_version >= 2) {
		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
		num_counters_nb = ebx.split.num_df_pmc;
		}

		pr_info("%d %s %s counters detected\n", num_counters_nb,
		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
		amd_nb_pmu.name);
		@@ -607,16 +706,11 @@ static int __init amd_uncore_init(void)
		if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
		if (boot_cpu_data.x86 >= 0x19) {
		*l3_attr++ = &format_attr_event8.attr;
		*l3_attr++ = &format_attr_umask.attr;
		*l3_attr++ = &format_attr_coreid.attr;
		*l3_attr++ = &format_attr_enallslices.attr;
		*l3_attr++ = &format_attr_enallcores.attr;
		*l3_attr++ = &format_attr_sliceid.attr;
		*l3_attr++ = &format_attr_umask8.attr;
		*l3_attr++ = &format_attr_threadmask2.attr;
		} else if (boot_cpu_data.x86 >= 0x17) {
		*l3_attr++ = &format_attr_event8.attr;
		*l3_attr++ = &format_attr_umask.attr;
		*l3_attr++ = &format_attr_slicemask.attr;
		*l3_attr++ = &format_attr_umask8.attr;
		*l3_attr++ = &format_attr_threadmask8.attr;
		}

arch/x86/events/intel/core.c

+4 −3

Original line number	Diff line number	Diff line
		@@ -4141,6 +4141,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		{
		struct event_constraint *c;

		c = intel_get_event_constraints(cpuc, idx, event);

		/*
		* :ppp means to do reduced skid PEBS,
		* which is available on PMC0 and fixed counter 0.
		@@ -4153,8 +4155,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		return &counter0_constraint;
		}

		c = intel_get_event_constraints(cpuc, idx, event);

		return c;
		}

		@@ -6241,7 +6241,8 @@ __init int intel_pmu_init(void)
		x86_pmu.flags \|= PMU_FL_INSTR_LATENCY;
		x86_pmu.flags \|= PMU_FL_MEM_LOADS_AUX;
		x86_pmu.lbr_pt_coexist = true;
		intel_pmu_pebs_data_source_skl(false);
		intel_pmu_pebs_data_source_adl();
		x86_pmu.pebs_latency_data = adl_latency_data_small;
		x86_pmu.num_topdown_events = 8;
		x86_pmu.update_topdown_event = adl_update_topdown_event;
		x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;

arch/x86/events/intel/ds.c

+86 −43

Original line number	Diff line number	Diff line
		@@ -94,15 +94,40 @@ void __init intel_pmu_pebs_data_source_nhm(void)
		pebs_data_source[0x07] = OP_LH \| P(LVL, L3) \| LEVEL(L3) \| P(SNOOP, HITM);
		}

		void __init intel_pmu_pebs_data_source_skl(bool pmem)
		static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
		{
		u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);

		pebs_data_source[0x08] = OP_LH \| pmem_or_l4 \| P(SNOOP, HIT);
		pebs_data_source[0x09] = OP_LH \| pmem_or_l4 \| REM \| P(SNOOP, HIT);
		pebs_data_source[0x0b] = OP_LH \| LEVEL(RAM) \| REM \| P(SNOOP, NONE);
		pebs_data_source[0x0c] = OP_LH \| LEVEL(ANY_CACHE) \| REM \| P(SNOOPX, FWD);
		pebs_data_source[0x0d] = OP_LH \| LEVEL(ANY_CACHE) \| REM \| P(SNOOP, HITM);
		data_source[0x08] = OP_LH \| pmem_or_l4 \| P(SNOOP, HIT);
		data_source[0x09] = OP_LH \| pmem_or_l4 \| REM \| P(SNOOP, HIT);
		data_source[0x0b] = OP_LH \| LEVEL(RAM) \| REM \| P(SNOOP, NONE);
		data_source[0x0c] = OP_LH \| LEVEL(ANY_CACHE) \| REM \| P(SNOOPX, FWD);
		data_source[0x0d] = OP_LH \| LEVEL(ANY_CACHE) \| REM \| P(SNOOP, HITM);
		}

		void __init intel_pmu_pebs_data_source_skl(bool pmem)
		{
		__intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
		}

		static void __init intel_pmu_pebs_data_source_grt(u64 *data_source)
		{
		data_source[0x05] = OP_LH \| P(LVL, L3) \| LEVEL(L3) \| P(SNOOP, HIT);
		data_source[0x06] = OP_LH \| P(LVL, L3) \| LEVEL(L3) \| P(SNOOP, HITM);
		data_source[0x08] = OP_LH \| P(LVL, L3) \| LEVEL(L3) \| P(SNOOPX, FWD);
		}

		void __init intel_pmu_pebs_data_source_adl(void)
		{
		u64 *data_source;

		data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
		memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
		__intel_pmu_pebs_data_source_skl(false, data_source);

		data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
		memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
		intel_pmu_pebs_data_source_grt(data_source);
		}

		static u64 precise_store_data(u64 status)
		@@ -171,7 +196,50 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status)
		return dse.val;
		}

		static u64 load_latency_data(u64 status)
		static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
		{
		/*
		* TLB access
		* 0 = did not miss 2nd level TLB
		* 1 = missed 2nd level TLB
		*/
		if (tlb)
		*val \|= P(TLB, MISS) \| P(TLB, L2);
		else
		*val \|= P(TLB, HIT) \| P(TLB, L1) \| P(TLB, L2);

		/* locked prefix */
		if (lock)
		*val \|= P(LOCK, LOCKED);
		}

		/* Retrieve the latency data for e-core of ADL */
		u64 adl_latency_data_small(struct perf_event *event, u64 status)
		{
		union intel_x86_pebs_dse dse;
		u64 val;

		WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);

		dse.val = status;

		val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];

		/*
		* For the atom core on ADL,
		* bit 4: lock, bit 5: TLB access.
		*/
		pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);

		if (dse.ld_data_blk)
		val \|= P(BLK, DATA);
		else
		val \|= P(BLK, NA);

		return val;
		}

		static u64 load_latency_data(struct perf_event *event, u64 status)
		{
		union intel_x86_pebs_dse dse;
		u64 val;
		@@ -181,7 +249,7 @@ static u64 load_latency_data(u64 status)
		/*
		* use the mapping table for bit 0-3
		*/
		val = pebs_data_source[dse.ld_dse];
		val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];

		/*
		* Nehalem models do not support TLB, Lock infos
		@@ -190,21 +258,8 @@ static u64 load_latency_data(u64 status)
		val \|= P(TLB, NA) \| P(LOCK, NA);
		return val;
		}
		/*
		* bit 4: TLB access
		* 0 = did not miss 2nd level TLB
		* 1 = missed 2nd level TLB
		*/
		if (dse.ld_stlb_miss)
		val \|= P(TLB, MISS) \| P(TLB, L2);
		else
		val \|= P(TLB, HIT) \| P(TLB, L1) \| P(TLB, L2);

		/*
		* bit 5: locked prefix
		*/
		if (dse.ld_locked)
		val \|= P(LOCK, LOCKED);
		pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);

		/*
		* Ice Lake and earlier models do not support block infos.
		@@ -233,7 +288,7 @@ static u64 load_latency_data(u64 status)
		return val;
		}

		static u64 store_latency_data(u64 status)
		static u64 store_latency_data(struct perf_event *event, u64 status)
		{
		union intel_x86_pebs_dse dse;
		u64 val;
		@@ -243,23 +298,9 @@ static u64 store_latency_data(u64 status)
		/*
		* use the mapping table for bit 0-3
		*/
		val = pebs_data_source[dse.st_lat_dse];
		val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];

		/*
		* bit 4: TLB access
		* 0 = did not miss 2nd level TLB
		* 1 = missed 2nd level TLB
		*/
		if (dse.st_lat_stlb_miss)
		val \|= P(TLB, MISS) \| P(TLB, L2);
		else
		val \|= P(TLB, HIT) \| P(TLB, L1) \| P(TLB, L2);

		/*
		* bit 5: locked prefix
		*/
		if (dse.st_lat_locked)
		val \|= P(LOCK, LOCKED);
		pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);

		val \|= P(BLK, NA);

		@@ -781,8 +822,8 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {

		struct event_constraint intel_grt_pebs_event_constraints[] = {
		/* Allow all events as PEBS with no flags */
		INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
		INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
		INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
		INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
		EVENT_CONSTRAINT_END
		};

		@@ -1443,9 +1484,11 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
		bool fst = fl & (PERF_X86_EVENT_PEBS_ST \| PERF_X86_EVENT_PEBS_HSW_PREC);

		if (fl & PERF_X86_EVENT_PEBS_LDLAT)
		val = load_latency_data(aux);
		val = load_latency_data(event, aux);
		else if (fl & PERF_X86_EVENT_PEBS_STLAT)
		val = store_latency_data(aux);
		val = store_latency_data(event, aux);
		else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
		val = x86_pmu.pebs_latency_data(event, aux);
		else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
		val = precise_datala_hsw(event, aux);
		else if (fst)

arch/x86/events/perf_event.h

+16 −1

Original line number	Diff line number	Diff line
		@@ -84,6 +84,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
		#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */
		#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */
		#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */
		#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */

		static inline bool is_topdown_count(struct perf_event *event)
		{
		@@ -136,7 +137,8 @@ struct amd_nb {
		PERF_SAMPLE_DATA_SRC \| PERF_SAMPLE_IDENTIFIER \| \
		PERF_SAMPLE_TRANSACTION \| PERF_SAMPLE_PHYS_ADDR \| \
		PERF_SAMPLE_REGS_INTR \| PERF_SAMPLE_REGS_USER \| \
		PERF_SAMPLE_PERIOD \| PERF_SAMPLE_CODE_PAGE_SIZE)
		PERF_SAMPLE_PERIOD \| PERF_SAMPLE_CODE_PAGE_SIZE \| \
		PERF_SAMPLE_WEIGHT_TYPE)

		#define PEBS_GP_REGS \
		((1ULL << PERF_REG_X86_AX) \| \
		@@ -460,6 +462,10 @@ struct cpu_hw_events {
		__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK\|X86_ALL_EVENT_FLAGS, \
		HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)

		#define INTEL_HYBRID_LAT_CONSTRAINT(c, n) \
		__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK\|X86_ALL_EVENT_FLAGS, \
		HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)

		/* Event constraint, but match on all event flags too. */
		#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
		EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT\|X86_ALL_EVENT_FLAGS)
		@@ -638,6 +644,8 @@ enum {
		x86_lbr_exclusive_max,
		};

		#define PERF_PEBS_DATA_SOURCE_MAX 0x10

		struct x86_hybrid_pmu {
		struct pmu pmu;
		const char *name;
		@@ -665,6 +673,8 @@ struct x86_hybrid_pmu {
		unsigned int late_ack :1,
		mid_ack :1,
		enabled_ack :1;

		u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX];
		};

		static __always_inline struct x86_hybrid_pmu hybrid_pmu(struct pmu pmu)
		@@ -825,6 +835,7 @@ struct x86_pmu {
		void (drain_pebs)(struct pt_regs regs, struct perf_sample_data *data);
		struct event_constraint *pebs_constraints;
		void (pebs_aliases)(struct perf_event event);
		u64 (pebs_latency_data)(struct perf_event event, u64 status);
		unsigned long large_pebs_flags;
		u64 rtm_abort_event;

		@@ -1392,6 +1403,8 @@ void intel_pmu_disable_bts(void);

		int intel_pmu_drain_bts_buffer(void);

		u64 adl_latency_data_small(struct perf_event *event, u64 status);

		extern struct event_constraint intel_core2_pebs_event_constraints[];

		extern struct event_constraint intel_atom_pebs_event_constraints[];
		@@ -1499,6 +1512,8 @@ void intel_pmu_pebs_data_source_nhm(void);

		void intel_pmu_pebs_data_source_skl(bool pmem);

		void intel_pmu_pebs_data_source_adl(void);

		int intel_pmu_setup_lbr_filter(struct perf_event *event);

		void intel_pt_interrupt(void);

arch/x86/include/asm/amd-ibs.h

+10 −6

Original line number	Diff line number	Diff line
		@@ -29,7 +29,10 @@ union ibs_fetch_ctl {
		rand_en:1, /* 57: random tagging enable */
		fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
		* (needs IbsFetchComp) */
		reserved:5; /* 59-63: reserved */
		l3_miss_only:1, /* 59: Collect L3 miss samples only */
		fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */
		fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */
		reserved:2; /* 62-63: reserved */
		};
		};

		@@ -38,14 +41,14 @@ union ibs_op_ctl {
		__u64 val;
		struct {
		__u64 opmaxcnt:16, /* 0-15: periodic op max. count */
		reserved0:1, /* 16: reserved */
		l3_miss_only:1, /* 16: Collect L3 miss samples only */
		op_en:1, /* 17: op sampling enable */
		op_val:1, /* 18: op sample valid */
		cnt_ctl:1, /* 19: periodic op counter control */
		opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
		reserved1:5, /* 27-31: reserved */
		reserved0:5, /* 27-31: reserved */
		opcurcnt:27, /* 32-58: periodic op counter current count */
		reserved2:5; /* 59-63: reserved */
		reserved1:5; /* 59-63: reserved */
		};
		};

		@@ -71,11 +74,12 @@ union ibs_op_data {
		union ibs_op_data2 {
		__u64 val;
		struct {
		__u64 data_src:3, /* 0-2: data source */
		__u64 data_src_lo:3, /* 0-2: data source low */
		reserved0:1, /* 3: reserved */
		rmt_node:1, /* 4: destination node */
		cache_hit_st:1, /* 5: cache hit state */
		reserved1:57; /* 5-63: reserved */
		data_src_hi:2, /* 6-7: data source high */
		reserved1:56; /* 8-63: reserved */
		};
		};