Commit add76959 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events updates from Ingo Molnar:

 - Thoroughly rewrite the data structures that implement perf task
   context handling, with the goal of fixing various quirks and
   unfeatures both in already merged, and in upcoming proposed code.

   The old data structure is the per task and per cpu
   perf_event_contexts:

         task_struct::perf_events_ctxp[] <-> perf_event_context <-> perf_cpu_context
              ^                                 |    ^     |           ^
              `---------------------------------'    |     `--> pmu ---'
                                                     v           ^
                                                perf_event ------'

   In this new design this is replaced with a single task context and a
   single CPU context, plus intermediate data-structures:

         task_struct::perf_event_ctxp -> perf_event_context <- perf_cpu_context
              ^                           |   ^ ^
              `---------------------------'   | |
                                              | |    perf_cpu_pmu_context <--.
                                              | `----.    ^                  |
                                              |      |    |                  |
                                              |      v    v                  |
                                              | ,--> perf_event_pmu_context  |
                                              | |                            |
                                              | |                            |
                                              v v                            |
                                         perf_event ---> pmu ----------------'

   [ See commit bd275681 for more details. ]

   This rewrite was developed by Peter Zijlstra and Ravi Bangoria.

 - Optimize perf_tp_event()

 - Update the Intel uncore PMU driver, extending it with UPI topology
   discovery on various hardware models.

 - Misc fixes & cleanups

* tag 'perf-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (25 commits)
  perf/x86/intel/uncore: Fix reference count leak in __uncore_imc_init_box()
  perf/x86/intel/uncore: Fix reference count leak in snr_uncore_mmio_map()
  perf/x86/intel/uncore: Fix reference count leak in hswep_has_limit_sbox()
  perf/x86/intel/uncore: Fix reference count leak in sad_cfg_iio_topology()
  perf/x86/intel/uncore: Make set_mapping() procedure void
  perf/x86/intel/uncore: Update sysfs-devices-mapping file
  perf/x86/intel/uncore: Enable UPI topology discovery for Sapphire Rapids
  perf/x86/intel/uncore: Enable UPI topology discovery for Icelake Server
  perf/x86/intel/uncore: Get UPI NodeID and GroupID
  perf/x86/intel/uncore: Enable UPI topology discovery for Skylake Server
  perf/x86/intel/uncore: Generalize get_topology() for SKX PMUs
  perf/x86/intel/uncore: Disable I/O stacks to PMU mapping on ICX-D
  perf/x86/intel/uncore: Clear attr_update properly
  perf/x86/intel/uncore: Introduce UPI topology type
  perf/x86/intel/uncore: Generalize IIO topology support
  perf/core: Don't allow grouping events from different hw pmus
  perf/amd/ibs: Make IBS a core pmu
  perf: Fix function pointer case
  perf/x86/amd: Remove the repeated declaration
  perf: Fix possible memleak in pmu_dev_alloc()
  ...
parents 617fe4fa 17b8d847
Loading
Loading
Loading
Loading
+29 −1
Original line number Diff line number Diff line
What:           /sys/devices/uncore_iio_x/dieX
Date:           February 2020
Contact:        Roman Sudarikov <roman.sudarikov@linux.intel.com>
Contact:        Alexander Antonov <alexander.antonov@linux.intel.com>
Description:
                Each IIO stack (PCIe root port) has its own IIO PMON block, so
                each dieX file (where X is die number) holds "Segment:Root Bus"
@@ -32,3 +32,31 @@ Description:
		    IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
		    IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
		    IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000

What:           /sys/devices/uncore_upi_x/dieX
Date:           March 2022
Contact:        Alexander Antonov <alexander.antonov@linux.intel.com>
Description:
                Each /sys/devices/uncore_upi_X/dieY file holds "upi_Z,die_W"
                value that means UPI link number X on die Y is connected to UPI
                link Z on die W and this link between sockets can be monitored
                by UPI PMON block.
                For example, 4-die Sapphire Rapids platform has the following
                UPI 0 topology::

		    # tail /sys/devices/uncore_upi_0/die*
		    ==> /sys/devices/uncore_upi_0/die0 <==
		    upi_1,die_1
		    ==> /sys/devices/uncore_upi_0/die1 <==
		    upi_0,die_3
		    ==> /sys/devices/uncore_upi_0/die2 <==
		    upi_1,die_3
		    ==> /sys/devices/uncore_upi_0/die3 <==
		    upi_0,die_1

                Which means::

		    UPI link 0 on die 0 is connected to UPI link 1 on die 1
		    UPI link 0 on die 1 is connected to UPI link 0 on die 3
		    UPI link 0 on die 2 is connected to UPI link 1 on die 3
		    UPI link 0 on die 3 is connected to UPI link 0 on die 1
 No newline at end of file
+11 −7
Original line number Diff line number Diff line
@@ -806,10 +806,14 @@ static void armv8pmu_disable_event(struct perf_event *event)

static void armv8pmu_start(struct arm_pmu *cpu_pmu)
{
	struct perf_event_context *task_ctx =
		this_cpu_ptr(cpu_pmu->pmu.pmu_cpu_context)->task_ctx;
	struct perf_event_context *ctx;
	int nr_user = 0;

	if (sysctl_perf_user_access && task_ctx && task_ctx->nr_user)
	ctx = perf_cpu_task_ctx();
	if (ctx)
		nr_user = ctx->nr_user;

	if (sysctl_perf_user_access && nr_user)
		armv8pmu_enable_user_access(cpu_pmu);
	else
		armv8pmu_disable_user_access();
@@ -1019,10 +1023,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
	return 0;
}

static int armv8pmu_filter_match(struct perf_event *event)
static bool armv8pmu_filter(struct pmu *pmu, int cpu)
{
	unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
	return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
	struct arm_pmu *armpmu = to_arm_pmu(pmu);
	return !cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus);
}

static void armv8pmu_reset(void *info)
@@ -1254,7 +1258,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
	cpu_pmu->stop			= armv8pmu_stop;
	cpu_pmu->reset			= armv8pmu_reset;
	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
	cpu_pmu->filter_match		= armv8pmu_filter_match;
	cpu_pmu->filter			= armv8pmu_filter;

	cpu_pmu->pmu.event_idx		= armv8pmu_user_event_idx;

+4 −4
Original line number Diff line number Diff line
@@ -132,7 +132,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)

static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
@@ -424,7 +424,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
		cpuhw->bhrb_context = event->ctx;
	}
	cpuhw->bhrb_users++;
	perf_sched_cb_inc(event->ctx->pmu);
	perf_sched_cb_inc(event->pmu);
}

static void power_pmu_bhrb_disable(struct perf_event *event)
@@ -436,7 +436,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)

	WARN_ON_ONCE(!cpuhw->bhrb_users);
	cpuhw->bhrb_users--;
	perf_sched_cb_dec(event->ctx->pmu);
	perf_sched_cb_dec(event->pmu);

	if (!cpuhw->disabled && !cpuhw->bhrb_users) {
		/* BHRB cannot be turned off when other
@@ -451,7 +451,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
/* Called from ctxsw to prevent one process's branch entries to
 * mingle with the other process's entries during context switch.
 */
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
	if (!ppmu->bhrb_nr)
		return;
+1 −1
Original line number Diff line number Diff line
@@ -377,7 +377,7 @@ static int paicrypt_push_sample(void)
/* Called on schedule-in and schedule-out. No access to event structure,
 * but for sampling only event CRYPTO_ALL is allowed.
 */
static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
	/* We started with a clean page on event installation. So read out
	 * results on schedule_out and if page was dirty, clear values.
+1 −1
Original line number Diff line number Diff line
@@ -466,7 +466,7 @@ static int paiext_push_sample(void)
/* Called on schedule-in and schedule-out. No access to event structure,
 * but for sampling only event NNPA_ALL is allowed.
 */
static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
	/* We started with a clean page on event installation. So read out
	 * results on schedule_out and if page was dirty, clear values.
Loading