Commit 63e6053a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-core-2022-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events updates from Ingo Molnar:

 - Fix Intel Alder Lake PEBS memory access latency & data source
   profiling info bugs.

 - Use Intel large-PEBS hardware feature in more circumstances, to
   reduce PMI overhead & reduce sampling data.

 - Extend the lost-sample profiling output with the PERF_FORMAT_LOST ABI
   variant, which tells tooling the exact number of samples lost.

 - Add new IBS register bits definitions.

 - AMD uncore events: Add PerfMonV2 DF (Data Fabric) enhancements.

* tag 'perf-core-2022-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/ibs: Add new IBS register bits into header
  perf/x86/intel: Fix PEBS data source encoding for ADL
  perf/x86/intel: Fix PEBS memory access info encoding for ADL
  perf/core: Add a new read format to get a number of lost samples
  perf/x86/amd/uncore: Add PerfMonV2 RDPMC assignments
  perf/x86/amd/uncore: Add PerfMonV2 DF event format
  perf/x86/amd/uncore: Detect available DF counters
  perf/x86/amd/uncore: Use attr_update for format attributes
  perf/x86/amd/uncore: Use dynamic events array
  x86/events/intel/ds: Enable large PEBS for PERF_SAMPLE_WEIGHT_TYPE
parents 22a39c3d 326ecc15
Loading
Loading
Loading
Loading
+120 −26
Original line number Diff line number Diff line
@@ -21,7 +21,6 @@
#define NUM_COUNTERS_NB		4
#define NUM_COUNTERS_L2		4
#define NUM_COUNTERS_L3		6
#define MAX_COUNTERS		6

#define RDPMC_BASE_NB		6
#define RDPMC_BASE_LLC		10
@@ -31,6 +30,7 @@
#undef pr_fmt
#define pr_fmt(fmt)	"amd_uncore: " fmt

static int pmu_version;
static int num_counters_llc;
static int num_counters_nb;
static bool l3_mask;
@@ -46,7 +46,7 @@ struct amd_uncore {
	u32 msr_base;
	cpumask_t *active_mask;
	struct pmu *pmu;
	struct perf_event *events[MAX_COUNTERS];
	struct perf_event **events;
	struct hlist_node node;
};

@@ -158,6 +158,16 @@ static int amd_uncore_add(struct perf_event *event, int flags)
	hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;

	/*
	 * The first four DF counters are accessible via RDPMC index 6 to 9
	 * followed by the L3 counters from index 10 to 15. For processors
	 * with more than four DF counters, the DF RDPMC assignments become
	 * discontiguous as the additional counters are accessible starting
	 * from index 16.
	 */
	if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
		hwc->event_base_rdpmc += NUM_COUNTERS_L3;

	if (flags & PERF_EF_START)
		amd_uncore_start(event, PERF_EF_RELOAD);

@@ -209,10 +219,14 @@ static int amd_uncore_event_init(struct perf_event *event)
{
	struct amd_uncore *uncore;
	struct hw_perf_event *hwc = &event->hw;
	u64 event_mask = AMD64_RAW_EVENT_MASK_NB;

	if (event->attr.type != event->pmu->type)
		return -ENOENT;

	if (pmu_version >= 2 && is_nb_event(event))
		event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;

	/*
	 * NB and Last level cache counters (MSRs) are shared across all cores
	 * that share the same NB / Last level cache.  On family 16h and below,
@@ -221,7 +235,7 @@ static int amd_uncore_event_init(struct perf_event *event)
	 * out. So we do not support sampling and per-thread events via
	 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
	 */
	hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
	hwc->config = event->attr.config & event_mask;
	hwc->idx = -1;

	if (event->cpu < 0)
@@ -247,6 +261,19 @@ static int amd_uncore_event_init(struct perf_event *event)
	return 0;
}

static umode_t
amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
	return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
	       attr->mode : 0;
}

static umode_t
amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
	return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
}

static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
					    struct device_attribute *attr,
					    char *buf)
@@ -287,8 +314,10 @@ static struct device_attribute format_attr_##_var = \

DEFINE_UNCORE_FORMAT_ATTR(event12,	event,		"config:0-7,32-35");
DEFINE_UNCORE_FORMAT_ATTR(event14,	event,		"config:0-7,32-35,59-60"); /* F17h+ DF */
DEFINE_UNCORE_FORMAT_ATTR(event14v2,	event,		"config:0-7,32-37");	   /* PerfMonV2 DF */
DEFINE_UNCORE_FORMAT_ATTR(event8,	event,		"config:0-7");		   /* F17h+ L3 */
DEFINE_UNCORE_FORMAT_ATTR(umask,	umask,		"config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(umask8,	umask,		"config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(umask12,	umask,		"config:8-15,24-27");	   /* PerfMonV2 DF */
DEFINE_UNCORE_FORMAT_ATTR(coreid,	coreid,		"config:42-44");	   /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(slicemask,	slicemask,	"config:48-51");	   /* F17h L3 */
DEFINE_UNCORE_FORMAT_ATTR(threadmask8,	threadmask,	"config:56-63");	   /* F17h L3 */
@@ -297,20 +326,33 @@ DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3
DEFINE_UNCORE_FORMAT_ATTR(enallcores,	enallcores,	"config:47");		   /* F19h L3 */
DEFINE_UNCORE_FORMAT_ATTR(sliceid,	sliceid,	"config:48-50");	   /* F19h L3 */

/* Common DF and NB attributes */
static struct attribute *amd_uncore_df_format_attr[] = {
	&format_attr_event12.attr, /* event14 if F17h+ */
	&format_attr_umask.attr,
	&format_attr_event12.attr,	/* event */
	&format_attr_umask8.attr,	/* umask */
	NULL,
};

/* Common L2 and L3 attributes */
static struct attribute *amd_uncore_l3_format_attr[] = {
	&format_attr_event12.attr, /* event8 if F17h+ */
	&format_attr_umask.attr,
	NULL, /* slicemask if F17h,	coreid if F19h */
	NULL, /* threadmask8 if F17h,	enallslices if F19h */
	NULL, /*			enallcores if F19h */
	NULL, /*			sliceid if F19h */
	NULL, /*			threadmask2 if F19h */
	&format_attr_event12.attr,	/* event */
	&format_attr_umask8.attr,	/* umask */
	NULL,				/* threadmask */
	NULL,
};

/* F17h unique L3 attributes */
static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
	&format_attr_slicemask.attr,	/* slicemask */
	NULL,
};

/* F19h unique L3 attributes */
static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
	&format_attr_coreid.attr,	/* coreid */
	&format_attr_enallslices.attr,	/* enallslices */
	&format_attr_enallcores.attr,	/* enallcores */
	&format_attr_sliceid.attr,	/* sliceid */
	NULL,
};

@@ -324,6 +366,18 @@ static struct attribute_group amd_uncore_l3_format_group = {
	.attrs = amd_uncore_l3_format_attr,
};

static struct attribute_group amd_f17h_uncore_l3_format_group = {
	.name = "format",
	.attrs = amd_f17h_uncore_l3_format_attr,
	.is_visible = amd_f17h_uncore_is_visible,
};

static struct attribute_group amd_f19h_uncore_l3_format_group = {
	.name = "format",
	.attrs = amd_f19h_uncore_l3_format_attr,
	.is_visible = amd_f19h_uncore_is_visible,
};

static const struct attribute_group *amd_uncore_df_attr_groups[] = {
	&amd_uncore_attr_group,
	&amd_uncore_df_format_group,
@@ -336,6 +390,12 @@ static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
	NULL,
};

static const struct attribute_group *amd_uncore_l3_attr_update[] = {
	&amd_f17h_uncore_l3_format_group,
	&amd_f19h_uncore_l3_format_group,
	NULL,
};

static struct pmu amd_nb_pmu = {
	.task_ctx_nr	= perf_invalid_context,
	.attr_groups	= amd_uncore_df_attr_groups,
@@ -353,6 +413,7 @@ static struct pmu amd_nb_pmu = {
static struct pmu amd_llc_pmu = {
	.task_ctx_nr	= perf_invalid_context,
	.attr_groups	= amd_uncore_l3_attr_groups,
	.attr_update	= amd_uncore_l3_attr_update,
	.name		= "amd_l2",
	.event_init	= amd_uncore_event_init,
	.add		= amd_uncore_add,
@@ -370,11 +431,19 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
			cpu_to_node(cpu));
}

static inline struct perf_event **
amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
{
	return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL,
			    cpu_to_node(cpu));
}

static int amd_uncore_cpu_up_prepare(unsigned int cpu)
{
	struct amd_uncore *uncore_nb = NULL, *uncore_llc;
	struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL;

	if (amd_uncore_nb) {
		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
		uncore_nb = amd_uncore_alloc(cpu);
		if (!uncore_nb)
			goto fail;
@@ -384,11 +453,15 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
		uncore_nb->active_mask = &amd_nb_active_mask;
		uncore_nb->pmu = &amd_nb_pmu;
		uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu);
		if (!uncore_nb->events)
			goto fail;
		uncore_nb->id = -1;
		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
	}

	if (amd_uncore_llc) {
		*per_cpu_ptr(amd_uncore_llc, cpu) = NULL;
		uncore_llc = amd_uncore_alloc(cpu);
		if (!uncore_llc)
			goto fail;
@@ -398,6 +471,9 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
		uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
		uncore_llc->active_mask = &amd_llc_active_mask;
		uncore_llc->pmu = &amd_llc_pmu;
		uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu);
		if (!uncore_llc->events)
			goto fail;
		uncore_llc->id = -1;
		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
	}
@@ -405,9 +481,16 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
	return 0;

fail:
	if (amd_uncore_nb)
		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
	if (uncore_nb) {
		kfree(uncore_nb->events);
		kfree(uncore_nb);
	}

	if (uncore_llc) {
		kfree(uncore_llc->events);
		kfree(uncore_llc);
	}

	return -ENOMEM;
}

@@ -540,8 +623,11 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
	if (cpu == uncore->cpu)
		cpumask_clear_cpu(cpu, uncore->active_mask);

	if (!--uncore->refcnt)
	if (!--uncore->refcnt) {
		kfree(uncore->events);
		kfree(uncore);
	}

	*per_cpu_ptr(uncores, cpu) = NULL;
}

@@ -560,6 +646,7 @@ static int __init amd_uncore_init(void)
{
	struct attribute **df_attr = amd_uncore_df_format_attr;
	struct attribute **l3_attr = amd_uncore_l3_format_attr;
	union cpuid_0x80000022_ebx ebx;
	int ret = -ENODEV;

	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
@@ -569,6 +656,9 @@ static int __init amd_uncore_init(void)
	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
		return -ENODEV;

	if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
		pmu_version = 2;

	num_counters_nb	= NUM_COUNTERS_NB;
	num_counters_llc = NUM_COUNTERS_L2;
	if (boot_cpu_data.x86 >= 0x17) {
@@ -585,8 +675,12 @@ static int __init amd_uncore_init(void)
	}

	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
		if (boot_cpu_data.x86 >= 0x17)
		if (pmu_version >= 2) {
			*df_attr++ = &format_attr_event14v2.attr;
			*df_attr++ = &format_attr_umask12.attr;
		} else if (boot_cpu_data.x86 >= 0x17) {
			*df_attr = &format_attr_event14.attr;
		}

		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
		if (!amd_uncore_nb) {
@@ -597,6 +691,11 @@ static int __init amd_uncore_init(void)
		if (ret)
			goto fail_nb;

		if (pmu_version >= 2) {
			ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
			num_counters_nb = ebx.split.num_df_pmc;
		}

		pr_info("%d %s %s counters detected\n", num_counters_nb,
			boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
			amd_nb_pmu.name);
@@ -607,16 +706,11 @@ static int __init amd_uncore_init(void)
	if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
		if (boot_cpu_data.x86 >= 0x19) {
			*l3_attr++ = &format_attr_event8.attr;
			*l3_attr++ = &format_attr_umask.attr;
			*l3_attr++ = &format_attr_coreid.attr;
			*l3_attr++ = &format_attr_enallslices.attr;
			*l3_attr++ = &format_attr_enallcores.attr;
			*l3_attr++ = &format_attr_sliceid.attr;
			*l3_attr++ = &format_attr_umask8.attr;
			*l3_attr++ = &format_attr_threadmask2.attr;
		} else if (boot_cpu_data.x86 >= 0x17) {
			*l3_attr++ = &format_attr_event8.attr;
			*l3_attr++ = &format_attr_umask.attr;
			*l3_attr++ = &format_attr_slicemask.attr;
			*l3_attr++ = &format_attr_umask8.attr;
			*l3_attr++ = &format_attr_threadmask8.attr;
		}

+4 −3
Original line number Diff line number Diff line
@@ -4141,6 +4141,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
{
	struct event_constraint *c;

	c = intel_get_event_constraints(cpuc, idx, event);

	/*
	 * :ppp means to do reduced skid PEBS,
	 * which is available on PMC0 and fixed counter 0.
@@ -4153,8 +4155,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		return &counter0_constraint;
	}

	c = intel_get_event_constraints(cpuc, idx, event);

	return c;
}

@@ -6241,7 +6241,8 @@ __init int intel_pmu_init(void)
		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
		x86_pmu.lbr_pt_coexist = true;
		intel_pmu_pebs_data_source_skl(false);
		intel_pmu_pebs_data_source_adl();
		x86_pmu.pebs_latency_data = adl_latency_data_small;
		x86_pmu.num_topdown_events = 8;
		x86_pmu.update_topdown_event = adl_update_topdown_event;
		x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;
+86 −43
Original line number Diff line number Diff line
@@ -94,15 +94,40 @@ void __init intel_pmu_pebs_data_source_nhm(void)
	pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
}

void __init intel_pmu_pebs_data_source_skl(bool pmem)
static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
{
	u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);

	pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
	pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
	pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
	pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
	pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
	data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
	data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
	data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
	data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
	data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
}

void __init intel_pmu_pebs_data_source_skl(bool pmem)
{
	__intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
}

static void __init intel_pmu_pebs_data_source_grt(u64 *data_source)
{
	data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
	data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
}

void __init intel_pmu_pebs_data_source_adl(void)
{
	u64 *data_source;

	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
	__intel_pmu_pebs_data_source_skl(false, data_source);

	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
	intel_pmu_pebs_data_source_grt(data_source);
}

static u64 precise_store_data(u64 status)
@@ -171,7 +196,50 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status)
	return dse.val;
}

static u64 load_latency_data(u64 status)
static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
{
	/*
	 * TLB access
	 * 0 = did not miss 2nd level TLB
	 * 1 = missed 2nd level TLB
	 */
	if (tlb)
		*val |= P(TLB, MISS) | P(TLB, L2);
	else
		*val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

	/* locked prefix */
	if (lock)
		*val |= P(LOCK, LOCKED);
}

/* Retrieve the latency data for e-core of ADL */
u64 adl_latency_data_small(struct perf_event *event, u64 status)
{
	union intel_x86_pebs_dse dse;
	u64 val;

	WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);

	dse.val = status;

	val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];

	/*
	 * For the atom core on ADL,
	 * bit 4: lock, bit 5: TLB access.
	 */
	pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);

	if (dse.ld_data_blk)
		val |= P(BLK, DATA);
	else
		val |= P(BLK, NA);

	return val;
}

static u64 load_latency_data(struct perf_event *event, u64 status)
{
	union intel_x86_pebs_dse dse;
	u64 val;
@@ -181,7 +249,7 @@ static u64 load_latency_data(u64 status)
	/*
	 * use the mapping table for bit 0-3
	 */
	val = pebs_data_source[dse.ld_dse];
	val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];

	/*
	 * Nehalem models do not support TLB, Lock infos
@@ -190,21 +258,8 @@ static u64 load_latency_data(u64 status)
		val |= P(TLB, NA) | P(LOCK, NA);
		return val;
	}
	/*
	 * bit 4: TLB access
	 * 0 = did not miss 2nd level TLB
	 * 1 = missed 2nd level TLB
	 */
	if (dse.ld_stlb_miss)
		val |= P(TLB, MISS) | P(TLB, L2);
	else
		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

	/*
	 * bit 5: locked prefix
	 */
	if (dse.ld_locked)
		val |= P(LOCK, LOCKED);
	pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);

	/*
	 * Ice Lake and earlier models do not support block infos.
@@ -233,7 +288,7 @@ static u64 load_latency_data(u64 status)
	return val;
}

static u64 store_latency_data(u64 status)
static u64 store_latency_data(struct perf_event *event, u64 status)
{
	union intel_x86_pebs_dse dse;
	u64 val;
@@ -243,23 +298,9 @@ static u64 store_latency_data(u64 status)
	/*
	 * use the mapping table for bit 0-3
	 */
	val = pebs_data_source[dse.st_lat_dse];
	val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];

	/*
	 * bit 4: TLB access
	 * 0 = did not miss 2nd level TLB
	 * 1 = missed 2nd level TLB
	 */
	if (dse.st_lat_stlb_miss)
		val |= P(TLB, MISS) | P(TLB, L2);
	else
		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);

	/*
	 * bit 5: locked prefix
	 */
	if (dse.st_lat_locked)
		val |= P(LOCK, LOCKED);
	pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);

	val |= P(BLK, NA);

@@ -781,8 +822,8 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {

struct event_constraint intel_grt_pebs_event_constraints[] = {
	/* Allow all events as PEBS with no flags */
	INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
	INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
	INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
	INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
	EVENT_CONSTRAINT_END
};

@@ -1443,9 +1484,11 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
	bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);

	if (fl & PERF_X86_EVENT_PEBS_LDLAT)
		val = load_latency_data(aux);
		val = load_latency_data(event, aux);
	else if (fl & PERF_X86_EVENT_PEBS_STLAT)
		val = store_latency_data(aux);
		val = store_latency_data(event, aux);
	else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
		val = x86_pmu.pebs_latency_data(event, aux);
	else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
		val = precise_datala_hsw(event, aux);
	else if (fst)
+16 −1
Original line number Diff line number Diff line
@@ -84,6 +84,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_TOPDOWN		0x04000 /* Count Topdown slots/metrics events */
#define PERF_X86_EVENT_PEBS_STLAT	0x08000 /* st+stlat data address sampling */
#define PERF_X86_EVENT_AMD_BRS		0x10000 /* AMD Branch Sampling */
#define PERF_X86_EVENT_PEBS_LAT_HYBRID	0x20000 /* ld and st lat for hybrid */

static inline bool is_topdown_count(struct perf_event *event)
{
@@ -136,7 +137,8 @@ struct amd_nb {
	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
	PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
	PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE)
	PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE | \
	PERF_SAMPLE_WEIGHT_TYPE)

#define PEBS_GP_REGS			\
	((1ULL << PERF_REG_X86_AX)    | \
@@ -460,6 +462,10 @@ struct cpu_hw_events {
	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)

#define INTEL_HYBRID_LAT_CONSTRAINT(c, n)	\
	__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)

/* Event constraint, but match on all event flags too. */
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -638,6 +644,8 @@ enum {
	x86_lbr_exclusive_max,
};

#define PERF_PEBS_DATA_SOURCE_MAX	0x10

struct x86_hybrid_pmu {
	struct pmu			pmu;
	const char			*name;
@@ -665,6 +673,8 @@ struct x86_hybrid_pmu {
	unsigned int			late_ack	:1,
					mid_ack		:1,
					enabled_ack	:1;

	u64				pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX];
};

static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
@@ -825,6 +835,7 @@ struct x86_pmu {
	void		(*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
	struct event_constraint *pebs_constraints;
	void		(*pebs_aliases)(struct perf_event *event);
	u64		(*pebs_latency_data)(struct perf_event *event, u64 status);
	unsigned long	large_pebs_flags;
	u64		rtm_abort_event;

@@ -1392,6 +1403,8 @@ void intel_pmu_disable_bts(void);

int intel_pmu_drain_bts_buffer(void);

u64 adl_latency_data_small(struct perf_event *event, u64 status);

extern struct event_constraint intel_core2_pebs_event_constraints[];

extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1499,6 +1512,8 @@ void intel_pmu_pebs_data_source_nhm(void);

void intel_pmu_pebs_data_source_skl(bool pmem);

void intel_pmu_pebs_data_source_adl(void);

int intel_pmu_setup_lbr_filter(struct perf_event *event);

void intel_pt_interrupt(void);
+10 −6
Original line number Diff line number Diff line
@@ -29,7 +29,10 @@ union ibs_fetch_ctl {
			rand_en:1,	/* 57: random tagging enable */
			fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
					 *      (needs IbsFetchComp) */
			reserved:5;	/* 59-63: reserved */
			l3_miss_only:1,	/* 59: Collect L3 miss samples only */
			fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */
			fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */
			reserved:2;	/* 62-63: reserved */
	};
};

@@ -38,14 +41,14 @@ union ibs_op_ctl {
	__u64 val;
	struct {
		__u64	opmaxcnt:16,	/* 0-15: periodic op max. count */
			reserved0:1,	/* 16: reserved */
			l3_miss_only:1,	/* 16: Collect L3 miss samples only */
			op_en:1,	/* 17: op sampling enable */
			op_val:1,	/* 18: op sample valid */
			cnt_ctl:1,	/* 19: periodic op counter control */
			opmaxcnt_ext:7,	/* 20-26: upper 7 bits of periodic op maximum count */
			reserved1:5,	/* 27-31: reserved */
			reserved0:5,	/* 27-31: reserved */
			opcurcnt:27,	/* 32-58: periodic op counter current count */
			reserved2:5;	/* 59-63: reserved */
			reserved1:5;	/* 59-63: reserved */
	};
};

@@ -71,11 +74,12 @@ union ibs_op_data {
union ibs_op_data2 {
	__u64 val;
	struct {
		__u64	data_src:3,	/* 0-2: data source */
		__u64	data_src_lo:3,	/* 0-2: data source low */
			reserved0:1,	/* 3: reserved */
			rmt_node:1,	/* 4: destination node */
			cache_hit_st:1,	/* 5: cache hit state */
			reserved1:57;	/* 5-63: reserved */
			data_src_hi:2,	/* 6-7: data source high */
			reserved1:56;	/* 8-63: reserved */
	};
};

Loading