Commit cddc555c authored by Anshuman Khandual's avatar Anshuman Khandual Committed by Junhao He
Browse files

drivers: perf: arm_pmu: Add infrastructure for branch stack sampling

maillist inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8EC9K
CVE: NA

Reference: https://lore.kernel.org/linux-arm-kernel/20240613061731.3109448-1-anshuman.khandual@arm.com/



--------------------------------

In order to support the Branch Record Buffer Extension (BRBE), we need to
extend the arm_pmu framework with some basic infrastructure for branch
stack sampling which arm_pmu drivers can opt-in to using. Subsequent
patches will use this to add support for BRBE in the PMUv3 driver.

With BRBE, the hardware records branches into a hardware FIFO, which will
be sampled by software when perf events overflow. A task may be context-
switched an arbitrary number of times between overflows, and to avoid
losing samples we need to save the current records when a task is context-
switched out. To do these we'll need to use the pmu::sched_task() callback,
and we'll also need to allocate some per-task storage space via event flag
PERF_ATTACH_TASK_DATA.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: default avatarAnshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: default avatarJunhao He <hejunhao3@huawei.com>
parent 4874a657
Loading
Loading
Loading
Loading
+39 −3
Original line number Diff line number Diff line
@@ -289,6 +289,23 @@ static void armpmu_start(struct perf_event *event, int flags)
{
	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
	struct hw_perf_event *hwc = &event->hw;
	struct pmu_hw_events *cpuc = this_cpu_ptr(armpmu->hw_events);
	int idx;

	/*
	 * Merge all branch filter requests from different perf
	 * events being added into this PMU. This includes both
	 * privilege and branch type filters.
	 */
	if (armpmu->has_branch_stack) {
		cpuc->branch_sample_type = 0;
		for (idx = 0; idx < ARMPMU_MAX_HWEVENTS; idx++) {
			struct perf_event *event_idx = cpuc->events[idx];

			if (event_idx && has_branch_stack(event_idx))
				cpuc->branch_sample_type |= event_idx->attr.branch_sample_type;
		}
	}

	/*
	 * ARM pmu always has to reprogram the period, so ignore
@@ -317,6 +334,9 @@ armpmu_del(struct perf_event *event, int flags)
	struct hw_perf_event *hwc = &event->hw;
	int idx = hwc->idx;

	if (has_branch_stack(event))
		armpmu->branch_stack_del(event, hw_events);

	armpmu_stop(event, PERF_EF_UPDATE);
	hw_events->events[idx] = NULL;
	armpmu->clear_event_idx(hw_events, event);
@@ -342,6 +362,9 @@ armpmu_add(struct perf_event *event, int flags)
	if (idx < 0)
		return idx;

	if (has_branch_stack(event))
		armpmu->branch_stack_add(event, hw_events);

	/*
	 * If there is an event in the counter we are going to use then make
	 * sure it is disabled.
@@ -511,13 +534,25 @@ static int armpmu_event_init(struct perf_event *event)
		!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
		return -ENOENT;

	/* does not support taken branch sampling */
	if (has_branch_stack(event))
	if (has_branch_stack(event)) {
		if (!armpmu->has_branch_stack)
			return -EOPNOTSUPP;

		if (!armpmu->branch_stack_init(event))
			return -EOPNOTSUPP;
	}

	return __hw_perf_event_init(event);
}

static void armpmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
	struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);

	if (armpmu->sched_task)
		armpmu->sched_task(pmu_ctx, sched_in);
}

static void armpmu_enable(struct pmu *pmu)
{
	struct arm_pmu *armpmu = to_arm_pmu(pmu);
@@ -881,6 +916,7 @@ struct arm_pmu *armpmu_alloc(void)
	}

	pmu->pmu = (struct pmu) {
		.sched_task	= armpmu_sched_task,
		.pmu_enable	= armpmu_enable,
		.pmu_disable	= armpmu_disable,
		.event_init	= armpmu_event_init,
+31 −1
Original line number Diff line number Diff line
@@ -46,6 +46,18 @@ static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_63BIT) == ARMPMU_EVT_63BIT);
	},								\
}

/*
 * Maximum branch record entries which could be processed
 * for core perf branch stack sampling support, regardless
 * of the hardware support available on a given ARM PMU.
 */
#define MAX_BRANCH_RECORDS 64

struct branch_records {
	struct perf_branch_stack	branch_stack;
	struct perf_branch_entry	branch_entries[MAX_BRANCH_RECORDS];
};

/* The events for a given PMU register set. */
struct pmu_hw_events {
	/*
@@ -72,6 +84,17 @@ struct pmu_hw_events {
	struct arm_pmu		*percpu_pmu;

	int irq;

	struct branch_records	*branches;

	/* Active context for task events */
	void			*branch_context;

	/* Active events requesting branch records */
	unsigned int		branch_users;

	/* Active branch sample type filters */
	unsigned long		branch_sample_type;
};

enum armpmu_attr_groups {
@@ -102,8 +125,15 @@ struct arm_pmu {
	void		(*stop)(struct arm_pmu *);
	void		(*reset)(void *);
	int		(*map_event)(struct perf_event *event);
	void		(*sched_task)(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
	bool		(*branch_stack_init)(struct perf_event *event);
	void		(*branch_stack_add)(struct perf_event *event, struct pmu_hw_events *cpuc);
	void		(*branch_stack_del)(struct perf_event *event, struct pmu_hw_events *cpuc);
	void		(*branch_stack_reset)(void);
	int		num_events;
	bool		secure_access; /* 32-bit ARM only */
	unsigned int	secure_access	: 1, /* 32-bit ARM only */
			has_branch_stack: 1, /* 64-bit ARM only */
			reserved	: 30;
#define ARMV8_PMUV3_MAX_COMMON_EVENTS		0x40
	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE	0x4000