Commit 486a7f46 authored by Shaokun Zhang's avatar Shaokun Zhang Committed by Will Deacon
Browse files

drivers/perf: hisi: Add new functions for L3C PMU



On HiSilicon Hip09 platform, some new functions are enhanced on L3C PMU:

* tt_req: it is the abbreviation of tracetag request and allows user to
count only read/write/atomic operations. tt_req is 3-bit and details are
listed in the hisi-pmu document.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5

* tt_core: it is the abbreviation of tracetag core and allows user to
filter by core/thread within the cluster, it is a 8-bit bitmap that each
bit represents the corresponding core/thread in this L3C.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0xf/ sleep 5

* datasrc_cfg: it is the abbreviation of data source configuration and
allows user to check where the data comes from, such as: from local DDR,
cross-die DDR or cross-socket DDR. Its is 5-bit and represents different
data source in the SoC.
$# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0xe/ sleep 5

* datasrc_skt: it is the abbreviation of data source from another socket
and is used in the multi-chips, if user wants to check the cross-socket
datat source, it shall be added in perf command. Only one bit is used to
control this.
$# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0x10,datasrc_skt=1/ sleep 5

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: default avatarJohn Garry <john.garry@huawei.com>
Co-developed-by: default avatarQi Liu <liuqi115@huawei.com>
Signed-off-by: default avatarQi Liu <liuqi115@huawei.com>
Signed-off-by: default avatarShaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/1615186237-22263-5-git-send-email-zhangshaokun@hisilicon.com


Signed-off-by: default avatarWill Deacon <will@kernel.org>
parent 3da582df
Loading
Loading
Loading
Loading
+240 −19
Original line number Diff line number Diff line
@@ -23,12 +23,17 @@
#define L3C_INT_MASK		0x0800
#define L3C_INT_STATUS		0x0808
#define L3C_INT_CLEAR		0x080c
#define L3C_CORE_CTRL           0x1b04
#define L3C_TRACETAG_CTRL       0x1b20
#define L3C_DATSRC_TYPE         0x1b48
#define L3C_DATSRC_CTRL         0x1bf0
#define L3C_EVENT_CTRL	        0x1c00
#define L3C_VERSION		0x1cf0
#define L3C_EVENT_TYPE0		0x1d00
/*
 * Each counter is 48-bits and [48:63] are reserved
 * which are Read-As-Zero and Writes-Ignored.
 * If the HW version only supports a 48-bit counter, then
 * bits [63:48] are reserved, which are Read-As-Zero and
 * Writes-Ignored.
 */
#define L3C_CNTR0_LOWER		0x1e00

@@ -36,8 +41,186 @@
#define L3C_NR_COUNTERS		0x8

#define L3C_PERF_CTRL_EN	0x10000
#define L3C_TRACETAG_EN		BIT(31)
#define L3C_TRACETAG_REQ_SHIFT	7
#define L3C_TRACETAG_MARK_EN	BIT(0)
#define L3C_TRACETAG_REQ_EN	(L3C_TRACETAG_MARK_EN | BIT(2))
#define L3C_TRACETAG_CORE_EN	(L3C_TRACETAG_MARK_EN | BIT(3))
#define L3C_CORE_EN		BIT(20)
#define L3C_COER_NONE		0x0
#define L3C_DATSRC_MASK		0xFF
#define L3C_DATSRC_SKT_EN	BIT(23)
#define L3C_DATSRC_NONE		0x0
#define L3C_EVTYPE_NONE		0xff
#define L3C_V1_NR_EVENTS	0x59
#define L3C_V2_NR_EVENTS	0xFF

HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);

static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
{
	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
	u32 tt_req = hisi_get_tt_req(event);

	if (tt_req) {
		u32 val;

		/* Set request-type for tracetag */
		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
		val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
		val |= L3C_TRACETAG_REQ_EN;
		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);

		/* Enable request-tracetag statistics */
		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
		val |= L3C_TRACETAG_EN;
		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
	}
}

static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
{
	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
	u32 tt_req = hisi_get_tt_req(event);

	if (tt_req) {
		u32 val;

		/* Clear request-type */
		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
		val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
		val &= ~L3C_TRACETAG_REQ_EN;
		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);

		/* Disable request-tracetag statistics */
		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
		val &= ~L3C_TRACETAG_EN;
		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
	}
}

static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
{
	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
	struct hw_perf_event *hwc = &event->hw;
	u32 reg, reg_idx, shift, val;
	int idx = hwc->idx;

	/*
	 * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
	 * There are 2 datasource ctrl register for the 8 hardware counters.
	 * Datasrc is 8-bits and for the former 4 hardware counters,
	 * L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters,
	 * L3C_DATSRC_TYPE1 is chosen.
	 */
	reg = L3C_DATSRC_TYPE + (idx / 4) * 4;
	reg_idx = idx % 4;
	shift = 8 * reg_idx;

	val = readl(l3c_pmu->base + reg);
	val &= ~(L3C_DATSRC_MASK << shift);
	val |= ds_cfg << shift;
	writel(val, l3c_pmu->base + reg);
}

static void hisi_l3c_pmu_config_ds(struct perf_event *event)
{
	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
	u32 ds_cfg = hisi_get_datasrc_cfg(event);
	u32 ds_skt = hisi_get_datasrc_skt(event);

	if (ds_cfg)
		hisi_l3c_pmu_write_ds(event, ds_cfg);

	if (ds_skt) {
		u32 val;

		val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
		val |= L3C_DATSRC_SKT_EN;
		writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
	}
}

static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
{
	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
	u32 ds_cfg = hisi_get_datasrc_cfg(event);
	u32 ds_skt = hisi_get_datasrc_skt(event);

	if (ds_cfg)
		hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE);

	if (ds_skt) {
		u32 val;

		val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
		val &= ~L3C_DATSRC_SKT_EN;
		writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
	}
}

static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
{
	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
	u32 core = hisi_get_tt_core(event);

	if (core) {
		u32 val;

		/* Config and enable core information */
		writel(core, l3c_pmu->base + L3C_CORE_CTRL);
		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
		val |= L3C_CORE_EN;
		writel(val, l3c_pmu->base + L3C_PERF_CTRL);

		/* Enable core-tracetag statistics */
		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
		val |= L3C_TRACETAG_CORE_EN;
		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
	}
}

static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
{
	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
	u32 core = hisi_get_tt_core(event);

	if (core) {
		u32 val;

		/* Clear core information */
		writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
		val &= ~L3C_CORE_EN;
		writel(val, l3c_pmu->base + L3C_PERF_CTRL);

		/* Disable core-tracetag statistics */
		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
		val &= ~L3C_TRACETAG_CORE_EN;
		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
	}
}

static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
{
	if (event->attr.config1 != 0x0) {
		hisi_l3c_pmu_config_req_tracetag(event);
		hisi_l3c_pmu_config_core_tracetag(event);
		hisi_l3c_pmu_config_ds(event);
	}
}

static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
{
	if (event->attr.config1 != 0x0) {
		hisi_l3c_pmu_clear_ds(event);
		hisi_l3c_pmu_clear_core_tracetag(event);
		hisi_l3c_pmu_clear_req_tracetag(event);
	}
}

/*
 * Select the counter register offset using the counter index
@@ -50,14 +233,12 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
				     struct hw_perf_event *hwc)
{
	/* Read 64-bits and the upper 16 bits are RAZ */
	return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
}

static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
				       struct hw_perf_event *hwc, u64 val)
{
	/* Write 64-bits and the upper 16 bits are WI */
	writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
}

@@ -166,23 +347,14 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)

static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
	{ "HISI0213", },
	{},
	{ "HISI0214", },
	{}
};
MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);

static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
				  struct hisi_pmu *l3c_pmu)
{
	unsigned long long id;
	acpi_status status;

	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
				       "_UID", NULL, &id);
	if (ACPI_FAILURE(status))
		return -EINVAL;

	l3c_pmu->index_id = id;

	/*
	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
@@ -220,6 +392,20 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
	.attrs = hisi_l3c_pmu_v1_format_attr,
};

static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
	HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
	HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
	HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
	HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
	NULL
};

static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
	.name = "format",
	.attrs = hisi_l3c_pmu_v2_format_attr,
};

static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
	HISI_PMU_EVENT_ATTR(rd_cpipe,		0x00),
	HISI_PMU_EVENT_ATTR(wr_cpipe,		0x01),
@@ -242,6 +428,19 @@ static const struct attribute_group hisi_l3c_pmu_v1_events_group = {
	.attrs = hisi_l3c_pmu_v1_events_attr,
};

static struct attribute *hisi_l3c_pmu_v2_events_attr[] = {
	HISI_PMU_EVENT_ATTR(l3c_hit,		0x48),
	HISI_PMU_EVENT_ATTR(cycles,		0x7f),
	HISI_PMU_EVENT_ATTR(l3c_ref,		0xb8),
	HISI_PMU_EVENT_ATTR(dat_access,		0xb9),
	NULL
};

static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
	.name = "events",
	.attrs = hisi_l3c_pmu_v2_events_attr,
};

static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);

static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
@@ -273,6 +472,14 @@ static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
	NULL,
};

static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
	&hisi_l3c_pmu_v2_format_group,
	&hisi_l3c_pmu_v2_events_group,
	&hisi_l3c_pmu_cpumask_attr_group,
	&hisi_l3c_pmu_identifier_group,
	NULL
};

static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
	.write_evtype		= hisi_l3c_pmu_write_evtype,
	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
@@ -286,6 +493,8 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
	.read_counter		= hisi_l3c_pmu_read_counter,
	.get_int_status		= hisi_l3c_pmu_get_int_status,
	.clear_int_status	= hisi_l3c_pmu_clear_int_status,
	.enable_filter		= hisi_l3c_pmu_enable_filter,
	.disable_filter		= hisi_l3c_pmu_disable_filter,
};

static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
@@ -301,12 +510,20 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
	if (ret)
		return ret;

	l3c_pmu->num_counters = L3C_NR_COUNTERS;
	if (l3c_pmu->identifier >= HISI_PMU_V2) {
		l3c_pmu->counter_bits = 64;
		l3c_pmu->check_event = L3C_V2_NR_EVENTS;
		l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
	} else {
		l3c_pmu->counter_bits = 48;
		l3c_pmu->check_event = L3C_V1_NR_EVENTS;
		l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
	}

	l3c_pmu->num_counters = L3C_NR_COUNTERS;
	l3c_pmu->ops = &hisi_uncore_l3c_ops;
	l3c_pmu->dev = &pdev->dev;
	l3c_pmu->on_cpu = -1;
	l3c_pmu->check_event = L3C_V1_NR_EVENTS;

	return 0;
}
@@ -334,8 +551,12 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
		return ret;
	}

	/*
	 * CCL_ID is used to identify the L3C in the same SCCL which was
	 * used _UID by mistake.
	 */
	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
			      l3c_pmu->sccl_id, l3c_pmu->index_id);
			      l3c_pmu->sccl_id, l3c_pmu->ccl_id);
	l3c_pmu->pmu = (struct pmu) {
		.name		= name,
		.module		= THIS_MODULE,
@@ -348,7 +569,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
		.start		= hisi_uncore_pmu_start,
		.stop		= hisi_uncore_pmu_stop,
		.read		= hisi_uncore_pmu_read,
		.attr_groups	= hisi_l3c_pmu_v1_attr_groups,
		.attr_groups	= l3c_pmu->pmu_events.attr_groups,
		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
	};

+7 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@
#include "hisi_uncore_pmu.h"

#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))

/*
 * PMU format attributes
@@ -245,6 +245,9 @@ static void hisi_uncore_pmu_enable_event(struct perf_event *event)
	hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
				    HISI_GET_EVENTID(event));

	if (hisi_pmu->ops->enable_filter)
		hisi_pmu->ops->enable_filter(event);

	hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
	hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
}
@@ -259,6 +262,9 @@ static void hisi_uncore_pmu_disable_event(struct perf_event *event)

	hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
	hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);

	if (hisi_pmu->ops->disable_filter)
		hisi_pmu->ops->disable_filter(event);
}

void hisi_uncore_pmu_set_event_period(struct perf_event *event)
+11 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#ifndef __HISI_UNCORE_PMU_H__
#define __HISI_UNCORE_PMU_H__

#include <linux/bitfield.h>
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/kernel.h>
@@ -22,6 +23,7 @@
#undef pr_fmt
#define pr_fmt(fmt)     "hisi_pmu: " fmt

#define HISI_PMU_V2		0x30
#define HISI_MAX_COUNTERS 0x10
#define to_hisi_pmu(p)	(container_of(p, struct hisi_pmu, pmu))

@@ -35,6 +37,12 @@
#define HISI_PMU_EVENT_ATTR(_name, _config)		\
	HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)

#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo)        \
	static inline u32 hisi_get_##name(struct perf_event *event)            \
	{                                                                  \
		return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config);  \
	}

struct hisi_pmu;

struct hisi_uncore_ops {
@@ -50,11 +58,14 @@ struct hisi_uncore_ops {
	void (*stop_counters)(struct hisi_pmu *);
	u32 (*get_int_status)(struct hisi_pmu *hisi_pmu);
	void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx);
	void (*enable_filter)(struct perf_event *event);
	void (*disable_filter)(struct perf_event *event);
};

struct hisi_pmu_hwevents {
	struct perf_event *hw_events[HISI_MAX_COUNTERS];
	DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
	const struct attribute_group **attr_groups;
};

/* Generic pmu struct for different pmu types */