Commit 91e1c99e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Thomas Gleixner:
 "Core:

   - Allow ftrace to instrument parts of the perf core code

   - Add a new mem_hops field to perf_mem_data_src which allows to
     represent intra-node/package or inter-node/off-package details to
     prepare for next generation systems which have more hieararchy
     within the node/pacakge level.

  Tools:

   - Update for the new mem_hops field in perf_mem_data_src

  Arch:

   - A set of constraints fixes for the Intel uncore PMU

   - The usual set of small fixes and improvements for x86 and PPC"

* tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Fix ICL/SPR INST_RETIRED.PREC_DIST encodings
  powerpc/perf: Fix data source encodings for L2.1 and L3.1 accesses
  tools/perf: Add mem_hops field in perf_mem_data_src structure
  perf: Add mem_hops field in perf_mem_data_src structure
  perf: Add comment about current state of PERF_MEM_LVL_* namespace and remove an extra line
  perf/core: Allow ftrace for functions in kernel/event/core.c
  perf/x86: Add new event for AUX output counter index
  perf/x86: Add compiler barrier after updating BTS
  perf/x86/intel/uncore: Fix Intel SPR M3UPI event constraints
  perf/x86/intel/uncore: Fix Intel SPR M2PCIE event constraints
  perf/x86/intel/uncore: Fix Intel SPR IIO event constraints
  perf/x86/intel/uncore: Fix Intel SPR CHA event constraints
  perf/x86/intel/uncore: Fix Intel ICX IIO event constraints
  perf/x86/intel/uncore: Fix invalid unit check
  perf/x86/intel/uncore: Support extra IMC channel on Ice Lake server
parents 5a47ebe9 2de71ee1
Loading
Loading
Loading
Loading
+21 −5
Original line number Diff line number Diff line
@@ -238,11 +238,27 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
		ret |= P(SNOOP, HIT);
		break;
	case 5:
		ret = PH(LVL, REM_CCE1);
		if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
			ret |= P(SNOOP, HIT);
		else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
			ret |= P(SNOOP, HITM);
		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
			ret = REM | P(HOPS, 0);

			if (sub_idx == 0 || sub_idx == 4)
				ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
			else if (sub_idx == 1 || sub_idx == 5)
				ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM);
			else if (sub_idx == 2 || sub_idx == 6)
				ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
			else if (sub_idx == 3 || sub_idx == 7)
				ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
		} else {
			if (sub_idx == 0)
				ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0);
			else if (sub_idx == 1)
				ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0);
			else if (sub_idx == 2 || sub_idx == 4)
				ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0);
			else if (sub_idx == 3 || sub_idx == 5)
				ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0);
		}
		break;
	case 6:
		ret = PH(LVL, REM_CCE2);
+2 −0
Original line number Diff line number Diff line
@@ -273,6 +273,8 @@
#define P(a, b)				PERF_MEM_S(a, b)
#define PH(a, b)			(P(LVL, HIT) | P(a, b))
#define PM(a, b)			(P(LVL, MISS) | P(a, b))
#define LEVEL(x)			P(LVLNUM, x)
#define REM				P(REMOTE, REMOTE)

int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
int isa207_compute_mmcr(u64 event[], int n_ev,
+6 −0
Original line number Diff line number Diff line
@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
DEFINE_STATIC_CALL_NULL(x86_pmu_enable,	     *x86_pmu.enable);
DEFINE_STATIC_CALL_NULL(x86_pmu_disable,     *x86_pmu.disable);

DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);

DEFINE_STATIC_CALL_NULL(x86_pmu_add,  *x86_pmu.add);
DEFINE_STATIC_CALL_NULL(x86_pmu_del,  *x86_pmu.del);
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
	hwc->last_cpu = smp_processor_id();
	hwc->last_tag = ++cpuc->tags[i];

	static_call_cond(x86_pmu_assign)(event, idx);

	switch (hwc->idx) {
	case INTEL_PMC_IDX_FIXED_BTS:
	case INTEL_PMC_IDX_FIXED_VLBR:
@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
	static_call_update(x86_pmu_enable, x86_pmu.enable);
	static_call_update(x86_pmu_disable, x86_pmu.disable);

	static_call_update(x86_pmu_assign, x86_pmu.assign);

	static_call_update(x86_pmu_add, x86_pmu.add);
	static_call_update(x86_pmu_del, x86_pmu.del);
	static_call_update(x86_pmu_read, x86_pmu.read);
+6 −0
Original line number Diff line number Diff line
@@ -209,6 +209,12 @@ static void bts_update(struct bts_ctx *bts)
	} else {
		local_set(&buf->data_size, head);
	}

	/*
	 * Since BTS is coherent, just add compiler barrier to ensure
	 * BTS updating is ordered against bts::handle::event.
	 */
	barrier();
}

static int
+19 −2
Original line number Diff line number Diff line
@@ -243,7 +243,8 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {

static struct event_constraint intel_icl_event_constraints[] = {
	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
	FIXED_EVENT_CONSTRAINT(0x01c0, 0),	/* INST_RETIRED.PREC_DIST */
	FIXED_EVENT_CONSTRAINT(0x01c0, 0),	/* old INST_RETIRED.PREC_DIST */
	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
@@ -288,7 +289,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {

static struct event_constraint intel_spr_event_constraints[] = {
	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
	FIXED_EVENT_CONSTRAINT(0x01c0, 0),	/* INST_RETIRED.PREC_DIST */
	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
@@ -2403,6 +2404,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
		intel_pmu_pebs_disable(event);
}

static void intel_pmu_assign_event(struct perf_event *event, int idx)
{
	if (is_pebs_pt(event))
		perf_report_aux_output_id(event, idx);
}

static void intel_pmu_del_event(struct perf_event *event)
{
	if (needs_branch_stack(event))
@@ -4495,8 +4502,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
	return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}

static void intel_aux_output_init(void)
{
	/* Refer also intel_pmu_aux_output_match() */
	if (x86_pmu.intel_cap.pebs_output_pt_available)
		x86_pmu.assign = intel_pmu_assign_event;
}

static int intel_pmu_aux_output_match(struct perf_event *event)
{
	/* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
	if (!x86_pmu.intel_cap.pebs_output_pt_available)
		return 0;

@@ -6302,6 +6317,8 @@ __init int intel_pmu_init(void)
	if (is_hybrid())
		intel_pmu_check_hybrid_pmus((u64)fixed_mask);

	intel_aux_output_init();

	return 0;
}

Loading