Commit 9d6a1df9 authored by Ian Rogers's avatar Ian Rogers Committed by Arnaldo Carvalho de Melo
Browse files

perf pmus: Allow just core PMU scanning



Scanning all PMUs is expensive as all PMUs sysfs entries are loaded,
benchmarking shows more than 4x the cost:

```
$ perf bench internals pmu-scan -i 1000
Computing performance of sysfs PMU event scan for 1000 times
  Average core PMU scanning took: 989.231 usec (+- 1.535 usec)
  Average PMU scanning took: 4309.425 usec (+- 74.322 usec)
```

Add new perf_pmus__scan_core routine that scans just core
PMUs. Replace perf_pmus__scan calls with perf_pmus__scan_core when
non-core PMUs are being ignored.

Reviewed-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarIan Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ali Saidi <alisaidi@amazon.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Dmitrii Dolgov <9erthalion6@gmail.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jing Zhang <renyu.zj@linux.alibaba.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kang Minchul <tegongkang@gmail.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Ming Wang <wangming01@loongson.cn>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20230527072210.2900565-30-irogers@google.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 15c57a80
Loading
Loading
Loading
Loading
+1 −4
Original line number Diff line number Diff line
@@ -11,10 +11,7 @@ static struct perf_pmu *pmu__find_core_pmu(void)
{
	struct perf_pmu *pmu = NULL;

	while ((pmu = perf_pmus__scan(pmu))) {
		if (!is_pmu_core(pmu->name))
			continue;

	while ((pmu = perf_pmus__scan_core(pmu))) {
		/*
		 * The cpumap should cover all CPUs. Otherwise, some CPUs may
		 * not support some events or have different event IDs.
+1 −4
Original line number Diff line number Diff line
@@ -33,13 +33,10 @@ static int ___evlist__add_default_attrs(struct evlist *evlist,
			continue;
		}

		while ((pmu = perf_pmus__scan(pmu)) != NULL) {
		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
			struct perf_cpu_map *cpus;
			struct evsel *evsel;

			if (!pmu->is_core)
				continue;

			evsel = evsel__new(attrs + i);
			if (evsel == NULL)
				goto out_delete_partial_list;
+3 −5
Original line number Diff line number Diff line
@@ -300,12 +300,10 @@ uint64_t arch__intr_reg_mask(void)
		 * The same register set is supported among different hybrid PMUs.
		 * Only check the first available one.
		 */
		while ((pmu = perf_pmus__scan(pmu)) != NULL) {
			if (pmu->is_core) {
		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
			type = pmu->type;
			break;
		}
		}
		attr.config |= type << PERF_PMU_TYPE_SHIFT;
	}

+29 −21
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ struct pmu_scan_result {
	int nr_aliases;
	int nr_formats;
	int nr_caps;
	bool is_core;
};

static const struct option options[] = {
@@ -53,6 +54,7 @@ static int save_result(void)
		r = results + nr_pmus;

		r->name = strdup(pmu->name);
		r->is_core = pmu->is_core;
		r->nr_caps = pmu->nr_caps;

		r->nr_aliases = 0;
@@ -72,7 +74,7 @@ static int save_result(void)
	return 0;
}

static int check_result(void)
static int check_result(bool core_only)
{
	struct pmu_scan_result *r;
	struct perf_pmu *pmu;
@@ -81,6 +83,9 @@ static int check_result(void)

	for (int i = 0; i < nr_pmus; i++) {
		r = &results[i];
		if (core_only && !r->is_core)
			continue;

		pmu = perf_pmus__find(r->name);
		if (pmu == NULL) {
			pr_err("Cannot find PMU %s\n", r->name);
@@ -130,7 +135,6 @@ static int run_pmu_scan(void)
	struct timeval start, end, diff;
	double time_average, time_stddev;
	u64 runtime_us;
	unsigned int i;
	int ret;

	init_stats(&stats);
@@ -142,26 +146,30 @@ static int run_pmu_scan(void)
		return -1;
	}

	for (i = 0; i < iterations; i++) {
	for (int j = 0; j < 2; j++) {
		bool core_only = (j == 0);

		for (unsigned int i = 0; i < iterations; i++) {
			gettimeofday(&start, NULL);
			if (core_only)
				perf_pmus__scan_core(NULL);
			else
				perf_pmus__scan(NULL);
			gettimeofday(&end, NULL);

			timersub(&end, &start, &diff);
			runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
			update_stats(&stats, runtime_us);

		ret = check_result();
			ret = check_result(core_only);
			perf_pmus__destroy();
			if (ret < 0)
				break;
		}

		time_average = avg_stats(&stats);
		time_stddev = stddev_stats(&stats);
	pr_info("  Average PMU scanning took: %.3f usec (+- %.3f usec)\n",
		time_average, time_stddev);

		pr_info("  Average%s PMU scanning took: %.3f usec (+- %.3f usec)\n",
			core_only ? " core" : "", time_average, time_stddev);
	}
	delete_result();
	return 0;
}
+1 −4
Original line number Diff line number Diff line
@@ -709,12 +709,9 @@ static int test__aliases(struct test_suite *test __maybe_unused,
	struct perf_pmu *pmu = NULL;
	unsigned long i;

	while ((pmu = perf_pmus__scan(pmu)) != NULL) {
	while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
		int count = 0;

		if (!is_pmu_core(pmu->name))
			continue;

		if (list_empty(&pmu->format)) {
			pr_debug2("skipping testing core PMU %s\n", pmu->name);
			continue;
Loading