Unverified Commit 3ba6d71a authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!2794 arm64/perf: Enable branch stack sampling

Merge Pull Request from: @ci-robot 
 
PR sync from: Junhao He <hejunhao3@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I65SDEDWRRON44EIG7WVIT44STWZAWGY/ 
This series enables perf branch stack sampling support on arm64 platform
via a new arch feature called Branch Record Buffer Extension (BRBE). All
relevant register definitions could be accessed here.

v2-v3:
- Remove indentation before macro #ifdef

v1-v2:
- Fix kabi breakage in struct perf_branch_entry

Anshuman Khandual (16):
  perf tools: Add missing branch_sample_type to
    perf_event_attr__fprintf()
  perf: Add irq and exception return branch types
  perf: Add system error and not in transaction branch types
  perf: Extend branch type classification
  perf: Capture branch privilege information
  perf: Add PERF_BR_NEW_ARCH_[N] map for BRBE on arm64 platform
  perf branch: Add system error and not in transaction branch types
  perf branch: Extend branch type classification
  perf branch: Add branch privilege information request flag
  perf branch: Add PERF_BR_NEW_ARCH_[N] map for BRBE on arm64 platform
  perf: Consolidate branch sample filter helpers
  perf record: Add remaining branch filters: "no_cycles", "no_flags" &
    "hw_index"
  arm64/sysreg: Add BRBE registers and fields
  drivers: perf: arm_pmu: Add new sched_task() callback
  drivers: perf: arm_pmuv3: Enable branch stack sampling framework
  drivers: perf: arm_pmuv3: Enable branch stack sampling via FEAT_BRBE

James Clark (5):
  perf evsel: Add error message for unsupported branch stack cases
  perf session: Print branch stack entry type in --dump-raw-trace
  perf script: Refactor branch stack printing
  perf script: Output branch sample type
  perf branch: Fix interpretation of branch records

Junhao He (1):
  perf: Fix kabi breakage in struct perf_branch_entry

Mark Brown (1):
  arm64/sysreg: Introduce helpers for access to sysreg fields

Sandipan Das (1):
  perf/core: Add speculation info to branch entries


-- 
2.33.0
 
https://gitee.com/src-openeuler/kernel/issues/I8DFTV
https://gitee.com/openeuler/kernel/issues/I8EC9K 
 
Link:https://gitee.com/openeuler/kernel/pulls/2794

 

Reviewed-by: default avatarXu Kuohai <xukuohai@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents 3ac6fd8f 39619c9c
Loading
Loading
Loading
Loading
+56 −0
Original line number Diff line number Diff line
@@ -239,12 +239,68 @@
/* PMMIR_EL1.SLOTS mask */
#define ARMV8_PMU_SLOTS_MASK	0xff

struct pmu_hw_events;
struct arm_pmu;
struct perf_event;

#ifdef CONFIG_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs)	perf_misc_flags(regs)
#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs

#ifdef CONFIG_ARM64_BRBE
void armv8pmu_branch_reset(void);
void armv8pmu_branch_probe(struct arm_pmu *arm_pmu);
bool armv8pmu_branch_attr_valid(struct perf_event *event);
void armv8pmu_branch_enable(struct perf_event *event);
void armv8pmu_branch_disable(struct perf_event *event);
void armv8pmu_branch_read(struct pmu_hw_events *cpuc,
			  struct perf_event *event);
void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx);
int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu);
void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu);
#else  /* !CONFIG_ARM64_BRBE */
static inline void armv8pmu_branch_reset(void)
{
}

static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu)
{
}

static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
{
	return false;
}

static inline void armv8pmu_branch_enable(struct perf_event *event)
{
}

static inline void armv8pmu_branch_disable(struct perf_event *event)
{
}

static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc,
					struct perf_event *event)
{
}

static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx)
{
}

static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu)
{
	return 0;
}

static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu)
{
}
#endif /* CONFIG_ARM64_BRBE */
#endif

#define perf_arch_fetch_caller_regs(regs, __ip) { \
+511 −0

File changed.

Preview size limit exceeded, changes collapsed.

+48 −0
Original line number Diff line number Diff line
@@ -486,6 +486,51 @@ EXPORT_SYMBOL(kimage_vaddr)
 */
	.section ".idmap.text","awx"

#ifdef CONFIG_ARM64_BRBE
/*
 * Enable BRBE cycle count
 *
 * BRBE requires both BRBCR_EL1.CC and BRBCR_EL2.CC fields, be set
 * for the cycle counts to be available in BRBINF<N>_EL1.CC during
 * branch record processing after a PMU interrupt. This enables CC
 * field on both these registers while still executing inside EL2.
 *
 * BRBE driver would still be able to toggle branch records cycle
 * count support via BRBCR_EL1.CC field regardless of whether the
 * kernel ends up executing in EL1 or EL2.
 */
.macro __init_el2_brbe
	mrs	x1, id_aa64dfr0_el1
	ubfx	x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
	cbz	x1, .Lskip_brbe_cc_\@

	mrs_s	x0, SYS_BRBCR_EL2
	orr	x0, x0, BRBCR_ELx_CC
	msr_s	SYS_BRBCR_EL2, x0

	/*
	 * Accessing BRBCR_EL1 register here does not require
	 * BRBCR_EL12 addressing mode as HCR_EL2.E2H is still
	 * clear. Regardless, check for HCR_E2H and be on the
	 * safer side.
	 */
	mrs	x1, hcr_el2
	and	x1, x1, #HCR_E2H
	cbz	x1, .Lset_brbe_el1_direct_\@

	mrs_s	x0, SYS_BRBCR_EL12
	orr	x0, x0, BRBCR_ELx_CC
	msr_s	SYS_BRBCR_EL12, x0
	b	.Lskip_brbe_cc_\@

.Lset_brbe_el1_direct_\@:
	mrs_s	x0, SYS_BRBCR_EL1
	orr	x0, x0, BRBCR_ELx_CC
	msr_s	SYS_BRBCR_EL1, x0
.Lskip_brbe_cc_\@:
.endm

#endif
/*
 * If we're fortunate enough to boot at EL2, ensure that the world is
 * sane before dropping to EL1.
@@ -601,6 +646,9 @@ set_hcr:
7:
	msr	mdcr_el2, x3			// Configure debug traps

#ifdef CONFIG_ARM64_BRBE
	__init_el2_brbe
#endif
	/* LORegions */
	mrs	x1, id_aa64mmfr1_el1
	ubfx	x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+89 −1
Original line number Diff line number Diff line
@@ -715,10 +715,16 @@ static void armv8pmu_enable_event(struct perf_event *event)
	 * Enable counter
	 */
	armv8pmu_enable_event_counter(event);

	if (has_branch_stack(event))
		armv8pmu_branch_enable(event);
}

static void armv8pmu_disable_event(struct perf_event *event)
{
	if (has_branch_stack(event))
		armv8pmu_branch_disable(event);

	/*
	 * Disable counter
	 */
@@ -792,6 +798,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
		if (!armpmu_event_set_period(event))
			continue;

		/*
		 * PMU IRQ should remain asserted until all branch records
		 * are captured and processed into struct perf_sample_data.
		 */
		if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) {
			armv8pmu_branch_read(cpuc, event);
			data.br_stack = &cpuc->branches->branch_stack;
			data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
		}

		/*
		 * Perf event overflow will queue the processing of the event as
		 * an irq_work which will be taken care of in the handling of
@@ -871,6 +887,24 @@ static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc,
		clear_bit(idx - 1, cpuc->used_mask);
}

static void armv8pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
{
	struct arm_pmu *armpmu = to_arm_pmu(ctx->pmu);
	void *task_ctx = ctx ? ctx->task_ctx_data : NULL;

	if (armpmu->has_branch_stack) {
		/* Save branch records in task_ctx on sched out */
		if (task_ctx && !sched_in) {
			armv8pmu_branch_save(armpmu, task_ctx);
			return;
		}

		/* Reset branch records on sched in */
		if (sched_in)
			armv8pmu_branch_reset();
	}
}

/*
 * Add an event filter to a given event.
 */
@@ -947,6 +981,9 @@ static void armv8pmu_reset(void *info)
		pmcr |= ARMV8_PMU_PMCR_LP;

	armv8pmu_pmcr_write(pmcr);

	if (cpu_pmu->has_branch_stack)
		armv8pmu_branch_reset();
}

static int __armv8_pmuv3_map_event(struct perf_event *event,
@@ -964,6 +1001,12 @@ static int __armv8_pmuv3_map_event(struct perf_event *event,
				       &armv8_pmuv3_perf_cache_map,
				       ARMV8_PMU_EVTYPE_EVENT);

	if (has_branch_stack(event)) {
		event->attach_state |= PERF_ATTACH_TASK_DATA;
		if (!armv8pmu_branch_attr_valid(event))
			return -EOPNOTSUPP;
	}

	if (armv8pmu_event_is_64bit(event))
		event->hw.flags |= ARMPMU_EVT_64BIT;

@@ -1056,6 +1099,35 @@ static void __armv8pmu_probe_pmu(void *info)
		cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1);
	else
		cpu_pmu->reg_pmmir = 0;
	armv8pmu_branch_probe(cpu_pmu);
}

static int branch_records_alloc(struct arm_pmu *armpmu)
{
	struct branch_records __percpu *records;
	int cpu;

	records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL);
	if (!records)
		return -ENOMEM;

	/*
	 * percpu memory allocated for 'records' gets completely consumed
	 * here, and never required to be freed up later. So permanently
	 * losing access to this anchor i.e 'records' is acceptable.
	 *
	 * Otherwise this allocation handle would have to be saved up for
	 * free_percpu() release later if required.
	 */
	for_each_possible_cpu(cpu) {
		struct pmu_hw_events *events_cpu;
		struct branch_records *records_cpu;

		events_cpu = per_cpu_ptr(armpmu->hw_events, cpu);
		records_cpu = per_cpu_ptr(records, cpu);
		events_cpu->branches = records_cpu;
	}
	return 0;
}

static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1072,7 +1144,21 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
	if (ret)
		return ret;

	return probe.present ? 0 : -ENODEV;
	if (!probe.present)
		return -ENODEV;

	if (cpu_pmu->has_branch_stack) {
		ret = armv8pmu_task_ctx_cache_alloc(cpu_pmu);
		if (ret)
			return ret;

		ret = branch_records_alloc(cpu_pmu);
		if (ret) {
			armv8pmu_task_ctx_cache_free(cpu_pmu);
			return ret;
		}
	}
	return 0;
}

static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
@@ -1097,6 +1183,8 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
	cpu_pmu->reset			= armv8pmu_reset;
	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
	cpu_pmu->filter_match		= armv8pmu_filter_match;
	cpu_pmu->sched_task		= armv8pmu_sched_task;
	cpu_pmu->branch_reset		= armv8pmu_branch_reset;

	cpu_pmu->name			= name;
	cpu_pmu->map_event		= map_event;
+2 −2
Original line number Diff line number Diff line
@@ -1350,10 +1350,10 @@ static int branch_map[X86_BR_TYPE_MAP_MAX] = {
	PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
	PERF_BR_SYSRET,		/* X86_BR_SYSRET */
	PERF_BR_UNKNOWN,	/* X86_BR_INT */
	PERF_BR_UNKNOWN,	/* X86_BR_IRET */
	PERF_BR_ERET,		/* X86_BR_IRET */
	PERF_BR_COND,		/* X86_BR_JCC */
	PERF_BR_UNCOND,		/* X86_BR_JMP */
	PERF_BR_UNKNOWN,	/* X86_BR_IRQ */
	PERF_BR_IRQ,		/* X86_BR_IRQ */
	PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
	PERF_BR_UNKNOWN,	/* X86_BR_ABORT */
	PERF_BR_UNKNOWN,	/* X86_BR_IN_TX */
Loading