Commit 8aad1a56 authored by Mao Minkai's avatar Mao Minkai Committed by guzitao
Browse files

sw64: add perf events support

Sunway inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8YRCP



--------------------------------

Add basic perf events support for SW64.

Signed-off-by: default avatarMao Minkai <maominkai@wxiat.com>
Reviewed-by: default avatarHe Sheng <hesheng@wxiat.com>
Signed-off-by: default avatarGu Zitao <guzitao@wxiat.com>
parent 12cbf3ee
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_SW64_PERF_EVENT_H
#define _ASM_SW64_PERF_EVENT_H

#include <asm/pmc.h>
#include <asm/ptrace.h>

#ifdef CONFIG_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs)  perf_misc_flags(regs)
#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
#endif

#endif /* _ASM_SW64_PERF_EVENT_H */
+55 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Definitions for use with the sw64 PMC interface.
 */

#ifndef _ASM_SW64_PMC_H
#define _ASM_SW64_PMC_H

#define PMC_PC0			0
#define PMC_PC1			1

/* Following commands are implemented on all CPUs */
#define PMC_CMD_DISABLE		0
#define PMC_CMD_ENABLE		1
#define PMC_CMD_EVENT_BASE	2
#define PMC_CMD_PM		4
#define PMC_CMD_READ		5
#define PMC_CMD_READ_CLEAR	6
#define PMC_CMD_WRITE_BASE	7

#define PMC_DISABLE_BASE	1

#define PMC_ENABLE_BASE		1

#define PC0_RAW_BASE		0x0
#define PC1_RAW_BASE		0x100
#define PC0_MAX			0xF
#define PC1_MAX			0x3D

#define SW64_PERFCTRL_KM	2
#define SW64_PERFCTRL_UM	3
#define SW64_PERFCTRL_AM	4

/* pc0 events */
#define PC0_INSTRUCTIONS		0x0
#define PC0_BRANCH_INSTRUCTIONS		0x3
#define PC0_CPU_CYCLES			0x8
#define PC0_ITB_READ			0x9
#define PC0_DTB_READ			0xA
#define PC0_ICACHE_READ			0xB
#define PC0_DCACHE_READ			0xC
#define PC0_SCACHE_REFERENCES		0xD

/* pc1 events */
#define PC1_BRANCH_MISSES		0xB
#define PC1_SCACHE_MISSES		0x10
#define PC1_ICACHE_READ_MISSES		0x16
#define PC1_ITB_MISSES			0x17
#define PC1_DTB_SINGLE_MISSES		0x30
#define PC1_DCACHE_MISSES		0x32

#define MAX_HWEVENTS			2
#define PMC_COUNT_MASK			((1UL << 58) - 1)

#endif /* _ASM_SW64_PMC_H */
+41 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */

#ifndef _UAPI_ASM_SW64_PERF_REGS_H
#define _UAPI_ASM_SW64_PERF_REGS_H

enum perf_event_sw64_regs {
	PERF_REG_SW64_R0,
	PERF_REG_SW64_R1,
	PERF_REG_SW64_R2,
	PERF_REG_SW64_R3,
	PERF_REG_SW64_R4,
	PERF_REG_SW64_R5,
	PERF_REG_SW64_R6,
	PERF_REG_SW64_R7,
	PERF_REG_SW64_R8,
	PERF_REG_SW64_R9,
	PERF_REG_SW64_R10,
	PERF_REG_SW64_R11,
	PERF_REG_SW64_R12,
	PERF_REG_SW64_R13,
	PERF_REG_SW64_R14,
	PERF_REG_SW64_R15,
	PERF_REG_SW64_R16,
	PERF_REG_SW64_R17,
	PERF_REG_SW64_R18,
	PERF_REG_SW64_R19,
	PERF_REG_SW64_R20,
	PERF_REG_SW64_R21,
	PERF_REG_SW64_R22,
	PERF_REG_SW64_R23,
	PERF_REG_SW64_R24,
	PERF_REG_SW64_R25,
	PERF_REG_SW64_R26,
	PERF_REG_SW64_R27,
	PERF_REG_SW64_R28,
	PERF_REG_SW64_GP,
	PERF_REG_SW64_SP,
	PERF_REG_SW64_PC,
	PERF_REG_SW64_MAX,
};
#endif /* _UAPI_ASM_SW64_PERF_REGS_H */
+787 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * Performance events support for SW64 platforms.
 *
 * This code is based upon riscv and sparc perf event code.
 */

#include <linux/perf_event.h>
#include <asm/stacktrace.h>

/* For tracking PMCs and the hw events they monitor on each CPU. */
struct cpu_hw_events {
	/*
	 * Set the bit (indexed by the counter number) when the counter
	 * is used for an event.
	 */
	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
	/* Array of events current scheduled on this cpu. */
	struct perf_event	*event[MAX_HWEVENTS];
};

DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);

struct sw64_perf_event {
	/* pmu index */
	int counter;
	/* events selector */
	int event;
};

/*
 * A structure to hold the description of the PMCs available on a particular
 * type of SW64 CPU.
 */
struct sw64_pmu_t {
	/* generic hw/cache events table */
	const struct sw64_perf_event *hw_events;
	const struct sw64_perf_event (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
		[PERF_COUNT_HW_CACHE_OP_MAX]
		[PERF_COUNT_HW_CACHE_RESULT_MAX];

	/* method used to map hw/cache events */
	const struct sw64_perf_event *(*map_hw_event)(u64 config);
	const struct sw64_perf_event *(*map_cache_event)(u64 config);

	/* The number of entries in the hw_event_map */
	int  max_events;

	/* The number of counters on this pmu */
	int  num_pmcs;

	/*
	 * All PMC counters reside in the IBOX register PCTR.  This is the
	 * LSB of the counter.
	 */
	int  pmc_count_shift[MAX_HWEVENTS];

	/*
	 * The mask that isolates the PMC bits when the LSB of the counter
	 * is shifted to bit 0.
	 */
	unsigned long pmc_count_mask;

	/* The maximum period the PMC can count. */
	unsigned long pmc_max_period;

	/*
	 * The maximum value that may be written to the counter due to
	 * hardware restrictions is pmc_max_period - pmc_left.
	 */
	long pmc_left;

	/* Subroutine for checking validity of a raw event for this PMU. */
	bool (*raw_event_valid)(u64 config);
};

/*
 * The SW64 PMU description currently in operation.  This is set during
 * the boot process to the specific CPU of the machine.
 */
static const struct sw64_pmu_t *sw64_pmu;

/*
 * SW64 PMC event types
 *
 * There is no one-to-one mapping of the possible hw event types to the
 * actual codes that are used to program the PMCs hence we introduce our
 * own hw event type identifiers.
 */
#define SW64_OP_UNSUP {-1, -1}

/* Mapping of the hw event types to the perf tool interface */
static const struct sw64_perf_event core3_hw_event_map[] = {
	[PERF_COUNT_HW_CPU_CYCLES]		= {PMC_PC0, PC0_CPU_CYCLES},
	[PERF_COUNT_HW_INSTRUCTIONS]		= {PMC_PC0, PC0_INSTRUCTIONS},
	[PERF_COUNT_HW_CACHE_REFERENCES]	= {PMC_PC0, PC0_SCACHE_REFERENCES},
	[PERF_COUNT_HW_CACHE_MISSES]		= {PMC_PC1, PC1_SCACHE_MISSES},
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= {PMC_PC0, PC0_BRANCH_INSTRUCTIONS},
	[PERF_COUNT_HW_BRANCH_MISSES]		= {PMC_PC1, PC1_BRANCH_MISSES},
};

/* Mapping of the hw cache event types to the perf tool interface */
#define C(x) PERF_COUNT_HW_CACHE_##x
static const struct sw64_perf_event core3_cache_event_map
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
	[C(L1D)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= {PMC_PC0, PC0_DCACHE_READ},
			[C(RESULT_MISS)]	= {PMC_PC1, PC1_DCACHE_MISSES}
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
	},
	[C(L1I)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= {PMC_PC0, PC0_ICACHE_READ},
			[C(RESULT_MISS)]	= {PMC_PC1, PC1_ICACHE_READ_MISSES},
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
	},
	[C(LL)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
	},
	[C(DTLB)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= {PMC_PC0, PC0_DTB_READ},
			[C(RESULT_MISS)]	= {PMC_PC1, PC1_DTB_SINGLE_MISSES},
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
	},
	[C(ITLB)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= {PMC_PC0, PC0_ITB_READ},
			[C(RESULT_MISS)]	= {PMC_PC1, PC1_ITB_MISSES},
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
	},
	[C(BPU)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
	},
	[C(NODE)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= SW64_OP_UNSUP,
			[C(RESULT_MISS)]	= SW64_OP_UNSUP,
		},
	},

};

static const struct sw64_perf_event *core3_map_hw_event(u64 config)
{
	return &sw64_pmu->hw_events[config];
}

static const struct sw64_perf_event *core3_map_cache_event(u64 config)
{
	unsigned int cache_type, cache_op, cache_result;
	const struct sw64_perf_event *perf_event;

	cache_type = (config >> 0) & 0xff;
	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
		return ERR_PTR(-EINVAL);

	cache_op = (config >> 8) & 0xff;
	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
		return ERR_PTR(-EINVAL);

	cache_result = (config >> 16) & 0xff;
	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
		return ERR_PTR(-EINVAL);

	perf_event = &((*sw64_pmu->cache_events)[cache_type][cache_op][cache_result]);
	if (perf_event->counter == -1) /* SW64_OP_UNSUP */
		return ERR_PTR(-ENOENT);

	return perf_event;
}

/*
 * r0xx for counter0, r1yy for counter1.
 * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 3D
 */
static bool core3_raw_event_valid(u64 config)
{
	if ((config >= PC0_RAW_BASE && config <= (PC0_RAW_BASE + PC0_MAX)) ||
		(config >= PC1_RAW_BASE && config <= (PC1_RAW_BASE + PC1_MAX)))
		return true;

	pr_info("sw64 pmu: invalid raw event config %#llx\n", config);
	return false;
}

static const struct sw64_pmu_t core3_pmu = {
	.max_events = ARRAY_SIZE(core3_hw_event_map),
	.hw_events = core3_hw_event_map,
	.map_hw_event = core3_map_hw_event,
	.cache_events = &core3_cache_event_map,
	.map_cache_event = core3_map_cache_event,
	.num_pmcs = MAX_HWEVENTS,
	.pmc_count_mask = PMC_COUNT_MASK,
	.pmc_max_period = PMC_COUNT_MASK,
	.pmc_left = 4,
	.raw_event_valid = core3_raw_event_valid,
};

/*
 * Low-level functions: reading/writing counters
 */
static void sw64_write_pmc(int idx, unsigned long val)
{
	wrperfmon(PMC_CMD_WRITE_BASE + idx, val);
}

static unsigned long sw64_read_pmc(int idx)
{
	return wrperfmon(PMC_CMD_READ, idx);
}

/* Set a new period to sample over */
static int sw64_perf_event_set_period(struct perf_event *event,
				struct hw_perf_event *hwc, int idx)
{
	long left = local64_read(&hwc->period_left);
	long period = hwc->sample_period;
	int overflow = 0;
	unsigned long value;

	if (unlikely(left <= -period)) {
		left = period;
		local64_set(&hwc->period_left, left);
		hwc->last_period = period;
		overflow = 1;
	}

	if (unlikely(left <= 0)) {
		left += period;
		local64_set(&hwc->period_left, left);
		hwc->last_period = period;
		overflow = 1;
	}

	if (left > (long)sw64_pmu->pmc_max_period)
		left = sw64_pmu->pmc_max_period;

	value = sw64_pmu->pmc_max_period - left;
	local64_set(&hwc->prev_count, value);
	sw64_write_pmc(idx, value);

	perf_event_update_userpage(event);

	return overflow;
}

/*
 * Calculates the count (the 'delta') since the last time the PMC was read.
 *
 * As the PMCs' full period can easily be exceeded within the perf system
 * sampling period we cannot use any high order bits as a guard bit in the
 * PMCs to detect overflow as is done by other architectures.  The code here
 * calculates the delta on the basis that there is no overflow when ovf is
 * zero.  The value passed via ovf by the interrupt handler corrects for
 * overflow.
 *
 * This can be racey on rare occasions -- a call to this routine can occur
 * with an overflowed counter just before the PMI service routine is called.
 * The check for delta negative hopefully always rectifies this situation.
 */
static unsigned long sw64_perf_event_update(struct perf_event *event,
					struct hw_perf_event *hwc, int idx, long ovf)
{
	long prev_raw_count, new_raw_count;
	long delta;

again:
	prev_raw_count = local64_read(&hwc->prev_count);
	new_raw_count = sw64_read_pmc(idx);

	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
			     new_raw_count) != prev_raw_count)
		goto again;

	delta = (new_raw_count - (prev_raw_count & sw64_pmu->pmc_count_mask)) + ovf;

	/* It is possible on very rare occasions that the PMC has overflowed
	 * but the interrupt is yet to come.  Detect and fix this situation.
	 */
	if (unlikely(delta < 0))
		delta += sw64_pmu->pmc_max_period + 1;

	local64_add(delta, &event->count);
	local64_sub(delta, &hwc->period_left);

	return new_raw_count;
}

/*
 * State transition functions:
 *
 * add()/del() & start()/stop()
 *
 */

/*
 * pmu->start: start the event.
 */
static void sw64_pmu_start(struct perf_event *event, int flags)
{
	struct hw_perf_event *hwc = &event->hw;

	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
		return;

	if (flags & PERF_EF_RELOAD) {
		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
		sw64_perf_event_set_period(event, hwc, hwc->idx);
	}

	hwc->state = 0;

	/* counting in selected modes, for both counters */
	wrperfmon(PMC_CMD_PM, hwc->config_base);
	wrperfmon(PMC_CMD_EVENT_BASE + hwc->idx, hwc->event_base);
	wrperfmon(PMC_CMD_ENABLE, PMC_ENABLE_BASE + hwc->idx);
}

/*
 * pmu->stop: stop the counter
 */
static void sw64_pmu_stop(struct perf_event *event, int flags)
{
	struct hw_perf_event *hwc = &event->hw;

	if (!(hwc->state & PERF_HES_STOPPED)) {
		wrperfmon(PMC_CMD_DISABLE, PMC_DISABLE_BASE + hwc->idx);
		hwc->state |= PERF_HES_STOPPED;
		barrier();
	}

	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
		sw64_perf_event_update(event, hwc, hwc->idx, 0);
		hwc->state |= PERF_HES_UPTODATE;
	}
}

/*
 * pmu->add: add the event to PMU.
 */
static int sw64_pmu_add(struct perf_event *event, int flags)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	int err = 0;
	unsigned long irq_flags;

	local_irq_save(irq_flags);

	if (__test_and_set_bit(hwc->idx, cpuc->used_mask)) {
		err = -ENOSPC;
		goto out;
	}

	cpuc->event[hwc->idx] = event;

	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
	if (flags & PERF_EF_START)
		sw64_pmu_start(event, PERF_EF_RELOAD);

	/* Propagate our changes to the userspace mapping. */
	perf_event_update_userpage(event);

out:
	local_irq_restore(irq_flags);

	return err;
}

/*
 * pmu->del: delete the event from PMU.
 */
static void sw64_pmu_del(struct perf_event *event, int flags)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	unsigned long irq_flags;

	local_irq_save(irq_flags);

	sw64_pmu_stop(event, PERF_EF_UPDATE);
	cpuc->event[hwc->idx] = NULL;
	__clear_bit(event->hw.idx, cpuc->used_mask);

	/* Absorb the final count and turn off the event. */
	perf_event_update_userpage(event);

	local_irq_restore(irq_flags);
}

/*
 * pmu->read: read and update the counter
 */
static void sw64_pmu_read(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	sw64_perf_event_update(event, hwc, hwc->idx, 0);
}

static bool supported_cpu(void)
{
	return true;
}

static void hw_perf_event_destroy(struct perf_event *event)
{
	/* Nothing to be done! */
}

static int __hw_perf_event_init(struct perf_event *event)
{
	struct perf_event_attr *attr = &event->attr;
	struct hw_perf_event *hwc = &event->hw;
	const struct sw64_perf_event *event_type;


	/*
	 * SW64 does not have per-counter usr/os/guest/host bits,
	 * we can distinguish exclude_user and exclude_kernel by
	 * sample mode.
	 */
	if (event->attr.exclude_hv || event->attr.exclude_idle ||
			event->attr.exclude_host || event->attr.exclude_guest)
		return -EINVAL;

	/*
	 * SW64 does not support precise ip feature, and system hang when
	 * detecting precise_ip by perf_event_attr__set_max_precise_ip
	 * in userspace
	 */
	if (attr->precise_ip != 0)
		return -EOPNOTSUPP;

	/* SW64 has fixed counter for given event type */
	if (attr->type == PERF_TYPE_HARDWARE) {
		if (attr->config >= sw64_pmu->max_events)
			return -EINVAL;
		event_type = sw64_pmu->map_hw_event(attr->config);
		hwc->idx = event_type->counter;
		hwc->event_base = event_type->event;
	} else if (attr->type == PERF_TYPE_HW_CACHE) {
		event_type = sw64_pmu->map_cache_event(attr->config);
		if (IS_ERR(event_type))	/* */
			return PTR_ERR(event_type);
		hwc->idx = event_type->counter;
		hwc->event_base = event_type->event;
	} else { /* PERF_TYPE_RAW */
		if (!sw64_pmu->raw_event_valid(attr->config))
			return -EINVAL;
		hwc->idx = attr->config >> 8;	/* counter selector */
		hwc->event_base = attr->config & 0xff;	/* event selector */
	}

	hwc->config_base = SW64_PERFCTRL_AM;

	if (attr->exclude_user)
		hwc->config_base = SW64_PERFCTRL_KM;
	if (attr->exclude_kernel)
		hwc->config_base = SW64_PERFCTRL_UM;

	hwc->config = attr->config;

	if (!is_sampling_event(event))
		pr_debug("not sampling event\n");

	event->destroy = hw_perf_event_destroy;

	if (!hwc->sample_period) {
		hwc->sample_period = sw64_pmu->pmc_max_period;
		hwc->last_period = hwc->sample_period;
		local64_set(&hwc->period_left, hwc->sample_period);
	}

	return 0;
}

/*
 * Main entry point to initialise a HW performance event.
 */
static int sw64_pmu_event_init(struct perf_event *event)
{
	int err;

	/* does not support taken branch sampling */
	if (has_branch_stack(event))
		return -EOPNOTSUPP;

	switch (event->attr.type) {
	case PERF_TYPE_RAW:
	case PERF_TYPE_HARDWARE:
	case PERF_TYPE_HW_CACHE:
		break;
	default:
		return -ENOENT;
	}

	if (!sw64_pmu)
		return -ENODEV;

	/* Do the real initialisation work. */
	err = __hw_perf_event_init(event);

	return err;
}

static struct pmu pmu = {
	.name		= "core3-base",
	.capabilities   = PERF_PMU_CAP_NO_NMI,
	.event_init	= sw64_pmu_event_init,
	.add		= sw64_pmu_add,
	.del		= sw64_pmu_del,
	.start		= sw64_pmu_start,
	.stop		= sw64_pmu_stop,
	.read		= sw64_pmu_read,
};

void perf_event_print_debug(void)
{
	unsigned long flags;
	unsigned long pcr0, pcr1;
	int cpu;

	if (!supported_cpu())
		return;

	local_irq_save(flags);

	cpu = smp_processor_id();

	pcr0 = wrperfmon(PMC_CMD_READ, PMC_PC0);
	pcr1 = wrperfmon(PMC_CMD_READ, PMC_PC1);

	pr_info("CPU#%d: PCTR0[%lx] PCTR1[%lx]\n", cpu, pcr0, pcr1);

	local_irq_restore(flags);
}

static void sw64_perf_event_irq_handler(unsigned long idx,
					struct pt_regs *regs)
{
	struct cpu_hw_events *cpuc;
	struct perf_sample_data data;
	struct perf_event *event;
	struct hw_perf_event *hwc;

	__this_cpu_inc(irq_pmi_count);
	cpuc = this_cpu_ptr(&cpu_hw_events);

	event = cpuc->event[idx];

	if (unlikely(!event)) {
		irq_err_count++;
		return;
	}

	hwc = &event->hw;
	sw64_perf_event_update(event, hwc, idx, sw64_pmu->pmc_max_period + 1);
	perf_sample_data_init(&data, 0, hwc->last_period);

	if (sw64_perf_event_set_period(event, hwc, idx)) {
		if (perf_event_overflow(event, &data, regs)) {
			/* Interrupts coming too quickly; "throttle" the
			 * counter, i.e., disable it for a little while.
			 */
			sw64_pmu_stop(event, 0);
		}
	}
}

bool valid_utext_addr(unsigned long addr)
{
	return addr >= current->mm->start_code && addr <= current->mm->end_code;
}

bool valid_dy_addr(unsigned long addr)
{
	bool ret = false;
	struct vm_area_struct *vma;
	struct mm_struct *mm = current->mm;

	if (addr > TASK_SIZE || addr < TASK_UNMAPPED_BASE)
		return ret;
	vma = find_vma(mm, addr);
	if (vma && vma->vm_start <= addr && (vma->vm_flags & VM_EXEC))
		ret = true;
	return ret;
}

#ifdef CONFIG_FRAME_POINTER
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
		struct pt_regs *regs)
{

	struct stack_frame frame;
	unsigned long __user *fp;
	int err;

	perf_callchain_store(entry, regs->pc);

	fp = (unsigned long __user *)regs->regs[15];

	while (entry->nr < entry->max_stack && (unsigned long)fp < current->mm->start_stack) {
		if (!access_ok(fp, sizeof(frame)))
			break;

		pagefault_disable();
		err =  __copy_from_user_inatomic(&frame, fp, sizeof(frame));
		pagefault_enable();

		if (err)
			break;

		if (valid_utext_addr(frame.return_address) || valid_dy_addr(frame.return_address))
			perf_callchain_store(entry, frame.return_address);
		fp = (void __user *)frame.next_frame;
	}
}
#else /* !CONFIG_FRAME_POINTER */
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
		struct pt_regs *regs)
{
	unsigned long usp = rdusp();
	unsigned long user_addr;
	int err;

	perf_callchain_store(entry, regs->pc);

	while (entry->nr < entry->max_stack && usp < current->mm->start_stack) {
		if (!access_ok((const void __user *)usp, 8))
			break;

		pagefault_disable();
		err = __get_user(user_addr, (unsigned long *)usp);
		pagefault_enable();

		if (err)
			break;

		if (valid_utext_addr(user_addr) || valid_dy_addr(user_addr))
			perf_callchain_store(entry, user_addr);
		usp = usp + 8;
	}
}
#endif/* CONFIG_FRAME_POINTER */

/*
 * Gets called by walk_stackframe() for every stackframe. This will be called
 * whist unwinding the stackframe and is like a subroutine return so we use
 * the PC.
 */
static int callchain_trace(unsigned long pc, void *data)
{
	struct perf_callchain_entry_ctx *entry = data;

	perf_callchain_store(entry, pc);
	return 0;
}

void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
			   struct pt_regs *regs)
{
	walk_stackframe(NULL, regs, callchain_trace, entry);
}

/*
 * Gets the perf_instruction_pointer and perf_misc_flags for guest os.
 */

unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
	if (perf_guest_state())
		return perf_guest_get_ip();

	return instruction_pointer(regs);
}

unsigned long perf_misc_flags(struct pt_regs *regs)
{
	unsigned int guest_state = perf_guest_state();
	int misc = 0;

	if (guest_state) {
		if (guest_state & PERF_GUEST_USER)
			misc |= PERF_RECORD_MISC_GUEST_USER;
		else
			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
	} else {
		if (user_mode(regs))
			misc |= PERF_RECORD_MISC_USER;
		else
			misc |= PERF_RECORD_MISC_KERNEL;
	}

	return misc;
}

/*
 * Init call to initialise performance events at kernel startup.
 */
int __init init_hw_perf_events(void)
{
	if (!supported_cpu()) {
		pr_info("Performance events: Unsupported CPU type!\n");
		return 0;
	}

	pr_info("Performance events: Supported CPU type!\n");

	/* Override performance counter IRQ vector */

	perf_irq = sw64_perf_event_irq_handler;

	/* And set up PMU specification */
	sw64_pmu = &core3_pmu;

	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);

	return 0;
}
early_initcall(init_hw_perf_events);
+33 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include <linux/errno.h>
#include <linux/perf_event.h>

u64 perf_reg_value(struct pt_regs *regs, int idx)
{
	if (WARN_ON_ONCE((u32)idx >= PERF_REG_SW64_MAX))
		return 0;

	return ((unsigned long *)regs)[idx];
}

#define REG_RESERVED (~((1ULL << PERF_REG_SW64_MAX) - 1))

int perf_reg_validate(u64 mask)
{
	if (!mask || mask & REG_RESERVED)
		return -EINVAL;
	return 0;
}

u64 perf_reg_abi(struct task_struct *task)
{
	return PERF_SAMPLE_REGS_ABI_64;
}

void perf_get_regs_user(struct perf_regs *regs_user,
			struct pt_regs *regs)
{
	regs_user->regs = task_pt_regs(current);
	regs_user->abi = perf_reg_abi(current);
}