Commit d310ec03 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-core-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance event updates from Ingo Molnar:

 - Add CPU-PMU support for Intel Sapphire Rapids CPUs

 - Extend the perf ABI with PERF_SAMPLE_WEIGHT_STRUCT, to offer
   two-parameter sampling event feedback. Not used yet, but is intended
   for Golden Cove CPU-PMU, which can provide both the instruction
   latency and the cache latency information for memory profiling
   events.

 - Remove experimental, default-disabled perfmon-v4 counter_freezing
   support that could only be enabled via a boot option. The hardware is
   hopelessly broken, we'd like to make sure nobody starts relying on
   this, as it would only end in tears.

 - Fix energy/power events on Intel SPR platforms

 - Simplify the uprobes resume_execution() logic

 - Misc smaller fixes.

* tag 'perf-core-2021-02-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/rapl: Fix psys-energy event on Intel SPR platform
  perf/x86/rapl: Only check lower 32bits for RAPL energy counters
  perf/x86/rapl: Add msr mask support
  perf/x86/kvm: Add Cascade Lake Xeon steppings to isolation_ucodes[]
  perf/x86/intel: Support CPUID 10.ECX to disable fixed counters
  perf/x86/intel: Add perf core PMU support for Sapphire Rapids
  perf/x86/intel: Filter unsupported Topdown metrics event
  perf/x86/intel: Factor out intel_update_topdown_event()
  perf/core: Add PERF_SAMPLE_WEIGHT_STRUCT
  perf/intel: Remove Perfmon-v4 counter_freezing support
  x86/perf: Use static_call for x86_pmu.guest_get_msrs
  perf/x86/intel/uncore: With > 8 nodes, get pci bus die id from NUMA info
  perf/x86/intel/uncore: Store the logical die id instead of the physical die id.
  x86/kprobes: Do not decode opcode in resume_execution()
parents 657bd90c 8bcfdd7c
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -945,12 +945,6 @@
			causing system reset or hang due to sending
			INIT from AP to BSP.

	perf_v4_pmi=	[X86,INTEL]
			Format: <bool>
			Disable Intel PMU counter freezing feature.
			The feature only exists starting from
			Arch Perfmon v4 (Skylake and newer).

	disable_ddw	[PPC/PSERIES]
			Disable Dynamic DMA Window support. Use this
			to workaround buggy firmware.
+1 −1
Original line number Diff line number Diff line
@@ -2195,7 +2195,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,

		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
						ppmu->get_mem_weight)
			ppmu->get_mem_weight(&data.weight);
			ppmu->get_mem_weight(&data.weight.full);

		if (perf_event_overflow(event, &data, regs))
			power_pmu_stop(event, 0);
+27 −1
Original line number Diff line number Diff line
@@ -81,6 +81,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs,   *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);

DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs,  *x86_pmu.guest_get_msrs);

u64 __read_mostly hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -253,6 +255,8 @@ static bool check_hw_exists(void)
		if (ret)
			goto msr_fail;
		for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
			if (fixed_counter_disabled(i))
				continue;
			if (val & (0x03 << i*4)) {
				bios_fail = 1;
				val_fail = val;
@@ -665,6 +669,12 @@ void x86_pmu_disable_all(void)
	}
}

struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
	return static_call(x86_pmu_guest_get_msrs)(nr);
}
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);

/*
 * There may be PMI landing after enabled=0. The PMI hitting could be before or
 * after disable_all.
@@ -1523,6 +1533,8 @@ void perf_event_print_debug(void)
			cpu, idx, prev_left);
	}
	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
		if (fixed_counter_disabled(idx))
			continue;
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);

		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1923,6 +1935,8 @@ static void x86_pmu_static_call_update(void)

	static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
	static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);

	static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
}

static void _x86_pmu_read(struct perf_event *event)
@@ -1930,6 +1944,13 @@ static void _x86_pmu_read(struct perf_event *event)
	x86_perf_event_update(event);
}

static inline struct perf_guest_switch_msr *
perf_guest_get_msrs_nop(int *nr)
{
	*nr = 0;
	return NULL;
}

static int __init init_hw_perf_events(void)
{
	struct x86_pmu_quirk *quirk;
@@ -1995,12 +2016,17 @@ static int __init init_hw_perf_events(void)
	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
	pr_info("... fixed-purpose events:   %lu\n",
			hweight64((((1ULL << x86_pmu.num_counters_fixed) - 1)
					<< INTEL_PMC_IDX_FIXED) & x86_pmu.intel_ctrl));
	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);

	if (!x86_pmu.read)
		x86_pmu.read = _x86_pmu_read;

	if (!x86_pmu.guest_get_msrs)
		x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;

	x86_pmu_static_call_update();

	/*
+353 −194
Original line number Diff line number Diff line
@@ -275,6 +275,55 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
	EVENT_EXTRA_END
};

static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
	EVENT_EXTRA_END
};

static struct event_constraint intel_spr_event_constraints[] = {
	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
	FIXED_EVENT_CONSTRAINT(0x01c0, 0),	/* INST_RETIRED.PREC_DIST */
	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),

	INTEL_EVENT_CONSTRAINT(0x2e, 0xff),
	INTEL_EVENT_CONSTRAINT(0x3c, 0xff),
	/*
	 * Generally event codes < 0x90 are restricted to counters 0-3.
	 * The 0x2E and 0x3C are exception, which has no restriction.
	 */
	INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),

	INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
	INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
	INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf),
	INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
	INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
	INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1),
	INTEL_EVENT_CONSTRAINT(0xce, 0x1),
	INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
	/*
	 * Generally event codes >= 0x90 are likely to have no restrictions.
	 * The exception are defined as above.
	 */
	INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff),

	EVENT_CONSTRAINT_END
};


EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
@@ -319,6 +368,10 @@ EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
EVENT_ATTR_STR(topdown-bad-spec,	td_bad_spec,		"event=0x00,umask=0x81");
EVENT_ATTR_STR(topdown-fe-bound,	td_fe_bound,		"event=0x00,umask=0x82");
EVENT_ATTR_STR(topdown-be-bound,	td_be_bound,		"event=0x00,umask=0x83");
EVENT_ATTR_STR(topdown-heavy-ops,	td_heavy_ops,		"event=0x00,umask=0x84");
EVENT_ATTR_STR(topdown-br-mispredict,	td_br_mispredict,	"event=0x00,umask=0x85");
EVENT_ATTR_STR(topdown-fetch-lat,	td_fetch_lat,		"event=0x00,umask=0x86");
EVENT_ATTR_STR(topdown-mem-bound,	td_mem_bound,		"event=0x00,umask=0x87");

static struct attribute *snb_events_attrs[] = {
	EVENT_PTR(td_slots_issued),
@@ -384,6 +437,108 @@ static u64 intel_pmu_event_map(int hw_event)
	return intel_perfmon_event_map[hw_event];
}

static __initconst const u64 spr_hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(L1D ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x81d0,
		[ C(RESULT_MISS)   ] = 0xe124,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x82d0,
	},
 },
 [ C(L1I ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_MISS)   ] = 0xe424,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(LL  ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x12a,
		[ C(RESULT_MISS)   ] = 0x12a,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x12a,
		[ C(RESULT_MISS)   ] = 0x12a,
	},
 },
 [ C(DTLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x81d0,
		[ C(RESULT_MISS)   ] = 0xe12,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x82d0,
		[ C(RESULT_MISS)   ] = 0xe13,
	},
 },
 [ C(ITLB) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = 0xe11,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(BPU ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x4c4,
		[ C(RESULT_MISS)   ] = 0x4c5,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = -1,
		[ C(RESULT_MISS)   ] = -1,
	},
 },
 [ C(NODE) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x12a,
		[ C(RESULT_MISS)   ] = 0x12a,
	},
 },
};

static __initconst const u64 spr_hw_cache_extra_regs
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(LL  ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x10001,
		[ C(RESULT_MISS)   ] = 0x3fbfc00001,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x3f3ffc0002,
		[ C(RESULT_MISS)   ] = 0x3f3fc00002,
	},
 },
 [ C(NODE) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x10c000001,
		[ C(RESULT_MISS)   ] = 0x3fb3000001,
	},
 },
};

/*
 * Notes on the events:
 * - data reads do not include code reads (comparable to earlier tables)
@@ -2134,18 +2289,6 @@ static void intel_tfa_pmu_enable_all(int added)
	intel_pmu_enable_all(added);
}

static void enable_counter_freeze(void)
{
	update_debugctlmsr(get_debugctlmsr() |
			DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
}

static void disable_counter_freeze(void)
{
	update_debugctlmsr(get_debugctlmsr() &
			~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
}

static inline u64 intel_pmu_get_status(void)
{
	u64 status;
@@ -2337,8 +2480,8 @@ static void __icl_update_topdown_event(struct perf_event *event,
	}
}

static void update_saved_topdown_regs(struct perf_event *event,
				      u64 slots, u64 metrics)
static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
				      u64 metrics, int metric_end)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct perf_event *other;
@@ -2347,7 +2490,7 @@ static void update_saved_topdown_regs(struct perf_event *event,
	event->hw.saved_slots = slots;
	event->hw.saved_metric = metrics;

	for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
	for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
		if (!is_topdown_idx(idx))
			continue;
		other = cpuc->events[idx];
@@ -2362,7 +2505,8 @@ static void update_saved_topdown_regs(struct perf_event *event,
 * The PERF_METRICS and Fixed counter 3 are read separately. The values may be
 * modify by a NMI. PMU has to be disabled before calling this function.
 */
static u64 icl_update_topdown_event(struct perf_event *event)

static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct perf_event *other;
@@ -2378,7 +2522,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
	/* read PERF_METRICS */
	rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);

	for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
	for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
		if (!is_topdown_idx(idx))
			continue;
		other = cpuc->events[idx];
@@ -2404,7 +2548,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
		 * Don't need to reset the PERF_METRICS and Fixed counter 3.
		 * Because the values will be restored in next schedule in.
		 */
		update_saved_topdown_regs(event, slots, metrics);
		update_saved_topdown_regs(event, slots, metrics, metric_end);
		reset = false;
	}

@@ -2413,12 +2557,18 @@ static u64 icl_update_topdown_event(struct perf_event *event)
		wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
		wrmsrl(MSR_PERF_METRICS, 0);
		if (event)
			update_saved_topdown_regs(event, 0, 0);
			update_saved_topdown_regs(event, 0, 0, metric_end);
	}

	return slots;
}

static u64 icl_update_topdown_event(struct perf_event *event)
{
	return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
						 x86_pmu.num_topdown_events - 1);
}

static void intel_pmu_read_topdown_event(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -2573,8 +2723,11 @@ static void intel_pmu_reset(void)
		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
	}
	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
		if (fixed_counter_disabled(idx))
			continue;
		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
	}

	if (ds)
		ds->bts_index = ds->bts_buffer_base;
@@ -2709,95 +2862,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
	return handled;
}

static bool disable_counter_freezing = true;
static int __init intel_perf_counter_freezing_setup(char *s)
{
	bool res;

	if (kstrtobool(s, &res))
		return -EINVAL;

	disable_counter_freezing = !res;
	return 1;
}
__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);

/*
 * Simplified handler for Arch Perfmon v4:
 * - We rely on counter freezing/unfreezing to enable/disable the PMU.
 * This is done automatically on PMU ack.
 * - Ack the PMU only after the APIC.
 */

static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	int handled = 0;
	bool bts = false;
	u64 status;
	int pmu_enabled = cpuc->enabled;
	int loops = 0;

	/* PMU has been disabled because of counter freezing */
	cpuc->enabled = 0;
	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
		bts = true;
		intel_bts_disable_local();
		handled = intel_pmu_drain_bts_buffer();
		handled += intel_bts_interrupt();
	}
	status = intel_pmu_get_status();
	if (!status)
		goto done;
again:
	intel_pmu_lbr_read();
	if (++loops > 100) {
		static bool warned;

		if (!warned) {
			WARN(1, "perfevents: irq loop stuck!\n");
			perf_event_print_debug();
			warned = true;
		}
		intel_pmu_reset();
		goto done;
	}


	handled += handle_pmi_common(regs, status);
done:
	/* Ack the PMI in the APIC */
	apic_write(APIC_LVTPC, APIC_DM_NMI);

	/*
	 * The counters start counting immediately while ack the status.
	 * Make it as close as possible to IRET. This avoids bogus
	 * freezing on Skylake CPUs.
	 */
	if (status) {
		intel_pmu_ack_status(status);
	} else {
		/*
		 * CPU may issues two PMIs very close to each other.
		 * When the PMI handler services the first one, the
		 * GLOBAL_STATUS is already updated to reflect both.
		 * When it IRETs, the second PMI is immediately
		 * handled and it sees clear status. At the meantime,
		 * there may be a third PMI, because the freezing bit
		 * isn't set since the ack in first PMI handlers.
		 * Double check if there is more work to be done.
		 */
		status = intel_pmu_get_status();
		if (status)
			goto again;
	}

	if (bts)
		intel_bts_enable_local();
	cpuc->enabled = pmu_enabled;
	return handled;
}

/*
 * This handler is triggered by the local APIC, so the APIC IRQ handling
 * rules apply:
@@ -3563,6 +3627,26 @@ static int core_pmu_hw_config(struct perf_event *event)
	return intel_pmu_bts_config(event);
}

#define INTEL_TD_METRIC_AVAILABLE_MAX	(INTEL_TD_METRIC_RETIRING + \
					 ((x86_pmu.num_topdown_events - 1) << 8))

static bool is_available_metric_event(struct perf_event *event)
{
	return is_metric_event(event) &&
		event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX;
}

static inline bool is_mem_loads_event(struct perf_event *event)
{
	return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01);
}

static inline bool is_mem_loads_aux_event(struct perf_event *event)
{
	return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
}


static int intel_pmu_hw_config(struct perf_event *event)
{
	int ret = x86_pmu_hw_config(event);
@@ -3636,7 +3720,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
		if (event->attr.config & X86_ALL_EVENT_FLAGS)
			return -EINVAL;

		if (is_metric_event(event)) {
		if (is_available_metric_event(event)) {
			struct perf_event *leader = event->group_leader;

			/* The metric events don't support sampling. */
@@ -3665,6 +3749,33 @@ static int intel_pmu_hw_config(struct perf_event *event)
		}
	}

	/*
	 * The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR
	 * doesn't function quite right. As a work-around it needs to always be
	 * co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82).
	 * The actual count of this second event is irrelevant it just needs
	 * to be active to make the first event function correctly.
	 *
	 * In a group, the auxiliary event must be in front of the load latency
	 * event. The rule is to simplify the implementation of the check.
	 * That's because perf cannot have a complete group at the moment.
	 */
	if (x86_pmu.flags & PMU_FL_MEM_LOADS_AUX &&
	    (event->attr.sample_type & PERF_SAMPLE_DATA_SRC) &&
	    is_mem_loads_event(event)) {
		struct perf_event *leader = event->group_leader;
		struct perf_event *sibling = NULL;

		if (!is_mem_loads_aux_event(leader)) {
			for_each_sibling_event(sibling, leader) {
				if (is_mem_loads_aux_event(sibling))
					break;
			}
			if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list))
				return -ENODATA;
		}
	}

	if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
		return 0;

@@ -3680,26 +3791,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
	return 0;
}

#ifdef CONFIG_RETPOLINE
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
#endif

struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
#ifdef CONFIG_RETPOLINE
	if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
		return intel_guest_get_msrs(nr);
	else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
		return core_guest_get_msrs(nr);
#endif
	if (x86_pmu.guest_get_msrs)
		return x86_pmu.guest_get_msrs(nr);
	*nr = 0;
	return NULL;
}
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);

static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -3864,6 +3955,29 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
	return hsw_get_event_constraints(cpuc, idx, event);
}

static struct event_constraint *
spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
			  struct perf_event *event)
{
	struct event_constraint *c;

	c = icl_get_event_constraints(cpuc, idx, event);

	/*
	 * The :ppp indicates the Precise Distribution (PDist) facility, which
	 * is only supported on the GP counter 0. If a :ppp event which is not
	 * available on the GP counter 0, error out.
	 */
	if (event->attr.precise_ip == 3) {
		if (c->idxmsk64 & BIT_ULL(0))
			return &counter0_constraint;

		return &emptyconstraint;
	}

	return c;
}

static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
			  struct perf_event *event)
@@ -3953,6 +4067,14 @@ static u64 nhm_limit_period(struct perf_event *event, u64 left)
	return max(left, 32ULL);
}

static u64 spr_limit_period(struct perf_event *event, u64 left)
{
	if (event->attr.precise_ip == 3)
		return max(left, 128ULL);

	return left;
}

PMU_FORMAT_ATTR(event,	"config:0-7"	);
PMU_FORMAT_ATTR(umask,	"config:8-15"	);
PMU_FORMAT_ATTR(edge,	"config:18"	);
@@ -4094,9 +4216,6 @@ static void intel_pmu_cpu_starting(int cpu)
	if (x86_pmu.version > 1)
		flip_smm_bit(&x86_pmu.attr_freeze_on_smi);

	if (x86_pmu.counter_freezing)
		enable_counter_freeze();

	/* Disable perf metrics if any added CPU doesn't support it. */
	if (x86_pmu.intel_cap.perf_metrics) {
		union perf_capabilities perf_cap;
@@ -4167,9 +4286,6 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
static void intel_pmu_cpu_dying(int cpu)
{
	fini_debug_store_on_cpu(cpu);

	if (x86_pmu.counter_freezing)
		disable_counter_freeze();
}

void intel_cpuc_finish(struct cpu_hw_events *cpuc)
@@ -4397,6 +4513,9 @@ static const struct x86_cpu_desc isolation_ucodes[] = {
	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X,		 2, 0x0b000014),
	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 3, 0x00000021),
	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 4, 0x00000000),
	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 5, 0x00000000),
	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 6, 0x00000000),
	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 7, 0x00000000),
	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L,		 3, 0x0000007c),
	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE,		 3, 0x0000007c),
	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		 9, 0x0000004e),
@@ -4561,39 +4680,6 @@ static __init void intel_nehalem_quirk(void)
	}
}

static const struct x86_cpu_desc counter_freezing_ucodes[] = {
	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,	 2, 0x0000000e),
	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,	 9, 0x0000002e),
	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,	10, 0x00000008),
	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D,	 1, 0x00000028),
	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS,	 1, 0x00000028),
	INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS,	 8, 0x00000006),
	{}
};

static bool intel_counter_freezing_broken(void)
{
	return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes);
}

static __init void intel_counter_freezing_quirk(void)
{
	/* Check if it's already disabled */
	if (disable_counter_freezing)
		return;

	/*
	 * If the system starts with the wrong ucode, leave the
	 * counter-freezing feature permanently disabled.
	 */
	if (intel_counter_freezing_broken()) {
		pr_info("PMU counter freezing disabled due to CPU errata,"
			"please upgrade microcode\n");
		x86_pmu.counter_freezing = false;
		x86_pmu.handle_irq = intel_pmu_handle_irq;
	}
}

/*
 * enable software workaround for errata:
 * SNB: BJ122
@@ -4703,6 +4789,42 @@ static struct attribute *icl_tsx_events_attrs[] = {
	NULL,
};


EVENT_ATTR_STR(mem-stores,	mem_st_spr,	"event=0xcd,umask=0x2");
EVENT_ATTR_STR(mem-loads-aux,	mem_ld_aux,	"event=0x03,umask=0x82");

static struct attribute *spr_events_attrs[] = {
	EVENT_PTR(mem_ld_hsw),
	EVENT_PTR(mem_st_spr),
	EVENT_PTR(mem_ld_aux),
	NULL,
};

static struct attribute *spr_td_events_attrs[] = {
	EVENT_PTR(slots),
	EVENT_PTR(td_retiring),
	EVENT_PTR(td_bad_spec),
	EVENT_PTR(td_fe_bound),
	EVENT_PTR(td_be_bound),
	EVENT_PTR(td_heavy_ops),
	EVENT_PTR(td_br_mispredict),
	EVENT_PTR(td_fetch_lat),
	EVENT_PTR(td_mem_bound),
	NULL,
};

static struct attribute *spr_tsx_events_attrs[] = {
	EVENT_PTR(tx_start),
	EVENT_PTR(tx_abort),
	EVENT_PTR(tx_commit),
	EVENT_PTR(tx_capacity_read),
	EVENT_PTR(tx_capacity_write),
	EVENT_PTR(tx_conflict),
	EVENT_PTR(cycles_t),
	EVENT_PTR(cycles_ct),
	NULL,
};

static ssize_t freeze_on_smi_show(struct device *cdev,
				  struct device_attribute *attr,
				  char *buf)
@@ -4926,7 +5048,7 @@ __init int intel_pmu_init(void)
	union cpuid10_eax eax;
	union cpuid10_ebx ebx;
	struct event_constraint *c;
	unsigned int unused;
	unsigned int fixed_mask;
	struct extra_reg *er;
	bool pmem = false;
	int version, i;
@@ -4948,7 +5070,7 @@ __init int intel_pmu_init(void)
	 * Check whether the Architectural PerfMon supports
	 * Branch Misses Retired hw_event or not.
	 */
	cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
	cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full);
	if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
		return -ENODEV;

@@ -4972,15 +5094,15 @@ __init int intel_pmu_init(void)
	 * Quirk: v2 perfmon does not report fixed-purpose events, so
	 * assume at least 3 events, when not running in a hypervisor:
	 */
	if (version > 1) {
	if (version > 1 && version < 5) {
		int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);

		x86_pmu.num_counters_fixed =
			max((int)edx.split.num_counters_fixed, assume);
	}

	if (version >= 4)
		x86_pmu.counter_freezing = !disable_counter_freezing;
		fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
	} else if (version >= 5)
		x86_pmu.num_counters_fixed = fls(fixed_mask);

	if (boot_cpu_has(X86_FEATURE_PDCM)) {
		u64 capabilities;
@@ -5109,7 +5231,6 @@ __init int intel_pmu_init(void)

	case INTEL_FAM6_ATOM_GOLDMONT:
	case INTEL_FAM6_ATOM_GOLDMONT_D:
		x86_add_quirk(intel_counter_freezing_quirk);
		memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
		       sizeof(hw_cache_event_ids));
		memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -5136,7 +5257,6 @@ __init int intel_pmu_init(void)
		break;

	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
		x86_add_quirk(intel_counter_freezing_quirk);
		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
		       sizeof(hw_cache_event_ids));
		memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
@@ -5483,12 +5603,50 @@ __init int intel_pmu_init(void)
		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
		x86_pmu.lbr_pt_coexist = true;
		intel_pmu_pebs_data_source_skl(pmem);
		x86_pmu.num_topdown_events = 4;
		x86_pmu.update_topdown_event = icl_update_topdown_event;
		x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
		pr_cont("Icelake events, ");
		name = "icelake";
		break;

	case INTEL_FAM6_SAPPHIRERAPIDS_X:
		pmem = true;
		x86_pmu.late_ack = true;
		memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
		memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));

		x86_pmu.event_constraints = intel_spr_event_constraints;
		x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
		x86_pmu.extra_regs = intel_spr_extra_regs;
		x86_pmu.limit_period = spr_limit_period;
		x86_pmu.pebs_aliases = NULL;
		x86_pmu.pebs_prec_dist = true;
		x86_pmu.pebs_block = true;
		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
		x86_pmu.flags |= PMU_FL_PEBS_ALL;
		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;

		x86_pmu.hw_config = hsw_hw_config;
		x86_pmu.get_event_constraints = spr_get_event_constraints;
		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
			hsw_format_attr : nhm_format_attr;
		extra_skl_attr = skl_format_attr;
		mem_attr = spr_events_attrs;
		td_attr = spr_td_events_attrs;
		tsx_attr = spr_tsx_events_attrs;
		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
		x86_pmu.lbr_pt_coexist = true;
		intel_pmu_pebs_data_source_skl(pmem);
		x86_pmu.num_topdown_events = 8;
		x86_pmu.update_topdown_event = icl_update_topdown_event;
		x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
		pr_cont("Sapphire Rapids events, ");
		name = "sapphire_rapids";
		break;

	default:
		switch (x86_pmu.version) {
		case 1:
@@ -5531,8 +5689,7 @@ __init int intel_pmu_init(void)
		x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
	}

	x86_pmu.intel_ctrl |=
		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
	x86_pmu.intel_ctrl |= (u64)fixed_mask << INTEL_PMC_IDX_FIXED;

	/* AnyThread may be deprecated on arch perfmon v5 or later */
	if (x86_pmu.intel_cap.anythread_deprecated)
@@ -5549,12 +5706,21 @@ __init int intel_pmu_init(void)
			 * events to the generic counters.
			 */
			if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
				/*
				 * Disable topdown slots and metrics events,
				 * if slots event is not in CPUID.
				 */
				if (!(INTEL_PMC_MSK_FIXED_SLOTS & x86_pmu.intel_ctrl))
					c->idxmsk64 = 0;
				c->weight = hweight64(c->idxmsk64);
				continue;
			}

			if (c->cmask == FIXED_EVENT_FLAGS
			    && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
			if (c->cmask == FIXED_EVENT_FLAGS) {
				/* Disabled fixed counters which are not in CPUID */
				c->idxmsk64 &= x86_pmu.intel_ctrl;

				if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES)
					c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
			}
			c->idxmsk64 &=
@@ -5601,13 +5767,6 @@ __init int intel_pmu_init(void)
		pr_cont("full-width counters, ");
	}

	/*
	 * For arch perfmon 4 use counter freezing to avoid
	 * several MSR accesses in the PMI.
	 */
	if (x86_pmu.counter_freezing)
		x86_pmu.handle_irq = intel_pmu_handle_irq_v4;

	if (x86_pmu.intel_cap.perf_metrics)
		x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;

+121 −10

File changed.

Preview size limit exceeded, changes collapsed.

Loading