Commit 8636df94 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-tools-for-v6.1-2-2022-10-16' of...

Merge tag 'perf-tools-for-v6.1-2-2022-10-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tools updates from Arnaldo Carvalho de Melo:

 - Use BPF CO-RE (Compile Once, Run Everywhere) to support old kernels
   when using bperf (perf BPF based counters) with cgroups.

 - Support HiSilicon PCIe Performance Monitoring Unit (PMU), that
   monitors bandwidth, latency, bus utilization and buffer occupancy.

   Documented in Documentation/admin-guide/perf/hisi-pcie-pmu.rst.

 - User space tasks can migrate between CPUs, so when tracing selected
   CPUs, system-wide sideband is still needed, fix it in the setup of
   Intel PT on hybrid systems.

 - Fix metricgroups title message in 'perf list', it should state that
   the metrics groups are to be used with the '-M' option, not '-e'.

 - Sync the msr-index.h copy with the kernel sources, adding support for
   using "AMD64_TSC_RATIO" in filter expressions in 'perf trace' as well
   as decoding it when printing the MSR tracepoint arguments.

 - Fix program header size and alignment when generating a JIT ELF in
   'perf inject'.

 - Add multiple new Intel PT 'perf test' entries, including a jitdump
   one.

 - Fix the 'perf test' entries for 'perf stat' CSV and JSON output when
   running on PowerPC due to an invalid topology number in that arch.

 - Fix the 'perf test' for arm_coresight failures on the ARM Juno
   system.

 - Fix the 'perf test' attr entry for PERF_FORMAT_LOST, adding this
   option to the or expression expected in the intercepted
   perf_event_open() syscall.

 - Add missing condition flags ('hs', 'lo', 'vc', 'vs') for arm64 in the
   'perf annotate' asm parser.

 - Fix 'perf mem record -C' option processing, it was being chopped up
   when preparing the underlying 'perf record -e mem-events' and thus
   being ignored, requiring using '-- -C CPUs' as a workaround.

 - Improvements and tidy ups for 'perf test' shell infra.

 - Fix Intel PT information printing segfault in uClibc, where a NULL
   format was being passed to fprintf.

* tag 'perf-tools-for-v6.1-2-2022-10-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (23 commits)
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  perf auxtrace arm64: Add support for parsing HiSilicon PCIe Trace packet
  perf auxtrace arm64: Add support for HiSilicon PCIe Tune and Trace device driver
  perf auxtrace arm: Refactor event list iteration in auxtrace_record__init()
  perf tests stat+json_output: Include sanity check for topology
  perf tests stat+csv_output: Include sanity check for topology
  perf intel-pt: Fix system_wide dummy event for hybrid
  perf intel-pt: Fix segfault in intel_pt_print_info() with uClibc
  perf test: Fix attr tests for PERF_FORMAT_LOST
  perf test: test_intel_pt.sh: Add 9 tests
  perf inject: Fix GEN_ELF_TEXT_OFFSET for jit
  perf test: test_intel_pt.sh: Add jitdump test
  perf test: test_intel_pt.sh: Tidy some alignment
  perf test: test_intel_pt.sh: Print a message when skipping kernel tracing
  perf test: test_intel_pt.sh: Tidy some perf record options
  perf test: test_intel_pt.sh: Fix return checking again
  perf: Skip and warn on unknown format 'configN' attrs
  perf list: Fix metricgroups title message
  perf mem: Fix -C option behavior for perf mem record
  perf annotate: Add missing condition flags for arm64
  ...
parents 2df76606 a3a36565
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -155,6 +155,11 @@
						 * Return Stack Buffer Predictions.
						 */

#define ARCH_CAP_XAPIC_DISABLE		BIT(21)	/*
						 * IA32_XAPIC_DISABLE_STATUS MSR
						 * supported
						 */

#define MSR_IA32_FLUSH_CMD		0x0000010b
#define L1D_FLUSH			BIT(0)	/*
						 * Writeback and invalidate the
@@ -585,6 +590,9 @@
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL		0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR	0xc0000302

/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT			0xc000010e

/* Fam 17h MSRs */
#define MSR_F17H_IRPERF			0xc00000e9

@@ -756,6 +764,8 @@
#define MSR_AMD_DBG_EXTN_CFG		0xc000010f
#define MSR_AMD_SAMP_BR_FROM		0xc0010300

#define DBG_EXTN_CFG_LBRV2EN		BIT_ULL(6)

#define MSR_IA32_MPERF			0x000000e7
#define MSR_IA32_APERF			0x000000e8

@@ -1054,4 +1064,12 @@
#define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
#define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1

/* x2APIC locked status */
#define MSR_IA32_XAPIC_DISABLE_STATUS	0xBD
#define LEGACY_XAPIC_DISABLED		BIT(0) /*
						* x2APIC mode is locked and
						* disabling x2APIC will cause
						* a #GP
						*/

#endif /* _ASM_X86_MSR_INDEX_H */
+2 −3
Original line number Diff line number Diff line
@@ -6,7 +6,6 @@
#include <linux/types.h>
#include <linux/limits.h>
#include <linux/bpf.h>
#include <linux/compiler.h>
#include <sys/types.h> /* pid_t */

#define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem))
@@ -207,7 +206,7 @@ struct perf_record_range_cpu_map {
	__u16 end_cpu;
};

struct __packed perf_record_cpu_map_data {
struct perf_record_cpu_map_data {
	__u16			 type;
	union {
		/* Used when type == PERF_CPU_MAP__CPUS. */
@@ -219,7 +218,7 @@ struct __packed perf_record_cpu_map_data {
		/* Used when type == PERF_CPU_MAP__RANGE_CPUS. */
		struct perf_record_range_cpu_map range_cpu_data;
	};
};
} __attribute__((packed));

#pragma GCC diagnostic pop

+97 −19
Original line number Diff line number Diff line
@@ -4,9 +4,11 @@
 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
 */

#include <dirent.h>
#include <stdbool.h>
#include <linux/coresight-pmu.h>
#include <linux/zalloc.h>
#include <api/fs/fs.h>

#include "../../../util/auxtrace.h"
#include "../../../util/debug.h"
@@ -14,6 +16,7 @@
#include "../../../util/pmu.h"
#include "cs-etm.h"
#include "arm-spe.h"
#include "hisi-ptt.h"

static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
{
@@ -50,42 +53,114 @@ static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
	return arm_spe_pmus;
}

static struct perf_pmu **find_all_hisi_ptt_pmus(int *nr_ptts, int *err)
{
	const char *sysfs = sysfs__mountpoint();
	struct perf_pmu **hisi_ptt_pmus = NULL;
	struct dirent *dent;
	char path[PATH_MAX];
	DIR *dir = NULL;
	int idx = 0;

	snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
	dir = opendir(path);
	if (!dir) {
		pr_err("can't read directory '%s'\n", EVENT_SOURCE_DEVICE_PATH);
		*err = -EINVAL;
		return NULL;
	}

	while ((dent = readdir(dir))) {
		if (strstr(dent->d_name, HISI_PTT_PMU_NAME))
			(*nr_ptts)++;
	}

	if (!(*nr_ptts))
		goto out;

	hisi_ptt_pmus = zalloc(sizeof(struct perf_pmu *) * (*nr_ptts));
	if (!hisi_ptt_pmus) {
		pr_err("hisi_ptt alloc failed\n");
		*err = -ENOMEM;
		goto out;
	}

	rewinddir(dir);
	while ((dent = readdir(dir))) {
		if (strstr(dent->d_name, HISI_PTT_PMU_NAME) && idx < *nr_ptts) {
			hisi_ptt_pmus[idx] = perf_pmu__find(dent->d_name);
			if (hisi_ptt_pmus[idx])
				idx++;
		}
	}

out:
	closedir(dir);
	return hisi_ptt_pmus;
}

static struct perf_pmu *find_pmu_for_event(struct perf_pmu **pmus,
					   int pmu_nr, struct evsel *evsel)
{
	int i;

	if (!pmus)
		return NULL;

	for (i = 0; i < pmu_nr; i++) {
		if (evsel->core.attr.type == pmus[i]->type)
			return pmus[i];
	}

	return NULL;
}

struct auxtrace_record
*auxtrace_record__init(struct evlist *evlist, int *err)
{
	struct perf_pmu	*cs_etm_pmu;
	struct perf_pmu	*cs_etm_pmu = NULL;
	struct perf_pmu **arm_spe_pmus = NULL;
	struct perf_pmu **hisi_ptt_pmus = NULL;
	struct evsel *evsel;
	bool found_etm = false;
	struct perf_pmu *found_etm = NULL;
	struct perf_pmu *found_spe = NULL;
	struct perf_pmu **arm_spe_pmus = NULL;
	struct perf_pmu *found_ptt = NULL;
	int auxtrace_event_cnt = 0;
	int nr_spes = 0;
	int i = 0;
	int nr_ptts = 0;

	if (!evlist)
		return NULL;

	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
	arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
	hisi_ptt_pmus = find_all_hisi_ptt_pmus(&nr_ptts, err);

	evlist__for_each_entry(evlist, evsel) {
		if (cs_etm_pmu &&
		    evsel->core.attr.type == cs_etm_pmu->type)
			found_etm = true;
		if (cs_etm_pmu && !found_etm)
			found_etm = find_pmu_for_event(&cs_etm_pmu, 1, evsel);

		if (!nr_spes || found_spe)
			continue;
		if (arm_spe_pmus && !found_spe)
			found_spe = find_pmu_for_event(arm_spe_pmus, nr_spes, evsel);

		for (i = 0; i < nr_spes; i++) {
			if (evsel->core.attr.type == arm_spe_pmus[i]->type) {
				found_spe = arm_spe_pmus[i];
				break;
			}
		}
		if (hisi_ptt_pmus && !found_ptt)
			found_ptt = find_pmu_for_event(hisi_ptt_pmus, nr_ptts, evsel);
	}

	free(arm_spe_pmus);
	free(hisi_ptt_pmus);

	if (found_etm)
		auxtrace_event_cnt++;

	if (found_etm && found_spe) {
		pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
	if (found_spe)
		auxtrace_event_cnt++;

	if (found_ptt)
		auxtrace_event_cnt++;

	if (auxtrace_event_cnt > 1) {
		pr_err("Concurrent AUX trace operation not currently supported\n");
		*err = -EOPNOTSUPP;
		return NULL;
	}
@@ -96,6 +171,9 @@ struct auxtrace_record
#if defined(__aarch64__)
	if (found_spe)
		return arm_spe_recording_init(err, found_spe);

	if (found_ptt)
		return hisi_ptt_recording_init(err, found_ptt);
#endif

	/*
+3 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include <linux/string.h>

#include "arm-spe.h"
#include "hisi-ptt.h"
#include "../../../util/pmu.h"

struct perf_event_attr
@@ -22,6 +23,8 @@ struct perf_event_attr
#if defined(__aarch64__)
	} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
		return arm_spe_pmu_default_config(pmu);
	} else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
		pmu->selectable = true;
#endif
	}

+1 −1
Original line number Diff line number Diff line
@@ -102,7 +102,7 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
	if (err)
		goto out_free_arm;
	/* b, b.cond, br, cbz/cbnz, tbz/tbnz */
	err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl)?n?z?$",
	err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|hs|le|lo|ls|lt|mi|ne|pl|vc|vs)?n?z?$",
		      REG_EXTENDED);
	if (err)
		goto out_free_call;
Loading