Commit a54ca194 authored by Tan Xiaojun's avatar Tan Xiaojun Committed by Arnaldo Carvalho de Melo
Browse files

perf arm-spe: Support synthetic events



After the commit ffd3d18c ("perf tools: Add ARM Statistical
Profiling Extensions (SPE) support") has been merged, it supports to
output raw data with option "--dump-raw-trace".  However, it misses for
support synthetic events so cannot output any statistical info.

This patch is to improve the "perf report" support for ARM SPE for four
types synthetic events:

  First level cache synthetic events, including L1 data cache accessing
  and missing events;
  Last level cache synthetic events, including last level cache
  accessing and missing events;
  TLB synthetic events, including TLB accessing and missing events;
  Remote access events, which is used to account load/store operations
  caused to another socket.

Example usage:

  $ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000
  $ perf report --stdio

  # Samples: 59  of event 'l1d-miss'
  # Event count (approx.): 59
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ..................................
  #
      23.73%    23.73%  dd       [kernel.kallsyms]  [k] perf_iterate_ctx.constprop.135
      20.34%    20.34%  dd       [kernel.kallsyms]  [k] filemap_map_pages
       5.08%     5.08%  dd       [kernel.kallsyms]  [k] perf_event_mmap
       5.08%     5.08%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
       5.08%     5.08%  dd       [kernel.kallsyms]  [k] unmap_page_range
       3.39%     3.39%  dd       [kernel.kallsyms]  [k] PageHuge
       3.39%     3.39%  dd       [kernel.kallsyms]  [k] release_pages
       3.39%     3.39%  dd       ld-2.28.so         [.] 0x0000000000008b5c
       1.69%     1.69%  dd       [kernel.kallsyms]  [k] __alloc_fd
       [...]

  # Samples: 3K of event 'l1d-access'
  # Event count (approx.): 3980
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ......................................
  #
      26.98%    26.98%  dd       [kernel.kallsyms]  [k] ret_to_user
      10.53%    10.53%  dd       [kernel.kallsyms]  [k] fsnotify
       7.51%     7.51%  dd       [kernel.kallsyms]  [k] new_sync_read
       4.57%     4.57%  dd       [kernel.kallsyms]  [k] vfs_read
       4.35%     4.35%  dd       [kernel.kallsyms]  [k] vfs_write
       3.69%     3.69%  dd       [kernel.kallsyms]  [k] __fget_light
       3.69%     3.69%  dd       [kernel.kallsyms]  [k] rw_verify_area
       3.44%     3.44%  dd       [kernel.kallsyms]  [k] security_file_permission
       2.76%     2.76%  dd       [kernel.kallsyms]  [k] __fsnotify_parent
       2.44%     2.44%  dd       [kernel.kallsyms]  [k] ksys_write
       2.24%     2.24%  dd       [kernel.kallsyms]  [k] iov_iter_zero
       2.19%     2.19%  dd       [kernel.kallsyms]  [k] read_iter_zero
       1.81%     1.81%  dd       dd                 [.] 0x0000000000002960
       1.78%     1.78%  dd       dd                 [.] 0x0000000000002980
       [...]

  # Samples: 35  of event 'llc-miss'
  # Event count (approx.): 35
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ...........................
  #
      34.29%    34.29%  dd       [kernel.kallsyms]  [k] filemap_map_pages
       8.57%     8.57%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
       8.57%     8.57%  dd       [kernel.kallsyms]  [k] unmap_page_range
       5.71%     5.71%  dd       [kernel.kallsyms]  [k] PageHuge
       5.71%     5.71%  dd       [kernel.kallsyms]  [k] release_pages
       5.71%     5.71%  dd       ld-2.28.so         [.] 0x0000000000008b5c
       2.86%     2.86%  dd       [kernel.kallsyms]  [k] __queue_work
       2.86%     2.86%  dd       [kernel.kallsyms]  [k] __radix_tree_lookup
       2.86%     2.86%  dd       [kernel.kallsyms]  [k] copy_page
       [...]

  # Samples: 2  of event 'llc-access'
  # Event count (approx.): 2
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  .............
  #
      50.00%    50.00%  dd       [kernel.kallsyms]  [k] copy_page
      50.00%    50.00%  dd       libc-2.28.so       [.] _dl_addr

  # Samples: 48  of event 'tlb-miss'
  # Event count (approx.): 48
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ..................................
  #
      20.83%    20.83%  dd       [kernel.kallsyms]  [k] perf_iterate_ctx.constprop.135
      12.50%    12.50%  dd       [kernel.kallsyms]  [k] __arch_clear_user
      10.42%    10.42%  dd       [kernel.kallsyms]  [k] clear_page
       4.17%     4.17%  dd       [kernel.kallsyms]  [k] copy_page
       4.17%     4.17%  dd       [kernel.kallsyms]  [k] filemap_map_pages
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] __alloc_fd
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] __mod_memcg_state.part.70
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] __queue_work
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] __rcu_read_unlock
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] d_path
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] destroy_inode
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] do_dentry_open
       [...]

  # Samples: 9K of event 'tlb-access'
  # Event count (approx.): 9573
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ......................................
  #
      25.79%    25.79%  dd       [kernel.kallsyms]  [k] __arch_clear_user
      11.22%    11.22%  dd       [kernel.kallsyms]  [k] ret_to_user
       8.56%     8.56%  dd       [kernel.kallsyms]  [k] fsnotify
       4.06%     4.06%  dd       [kernel.kallsyms]  [k] new_sync_read
       3.67%     3.67%  dd       [kernel.kallsyms]  [k] el0_svc_common.constprop.2
       3.04%     3.04%  dd       [kernel.kallsyms]  [k] __fsnotify_parent
       2.90%     2.90%  dd       [kernel.kallsyms]  [k] vfs_write
       2.82%     2.82%  dd       [kernel.kallsyms]  [k] vfs_read
       2.52%     2.52%  dd       libc-2.28.so       [.] write
       2.26%     2.26%  dd       [kernel.kallsyms]  [k] security_file_permission
       2.08%     2.08%  dd       [kernel.kallsyms]  [k] ksys_write
       1.96%     1.96%  dd       [kernel.kallsyms]  [k] rw_verify_area
       1.95%     1.95%  dd       [kernel.kallsyms]  [k] read_iter_zero
       [...]

  # Samples: 9  of event 'branch-miss'
  # Event count (approx.): 9
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  .........................
  #
      22.22%    22.22%  dd       libc-2.28.so       [.] _dl_addr
      11.11%    11.11%  dd       [kernel.kallsyms]  [k] __arch_clear_user
      11.11%    11.11%  dd       [kernel.kallsyms]  [k] __arch_copy_from_user
      11.11%    11.11%  dd       [kernel.kallsyms]  [k] __dentry_kill
      11.11%    11.11%  dd       [kernel.kallsyms]  [k] __efistub_memcpy
      11.11%    11.11%  dd       ld-2.28.so         [.] 0x0000000000012b7c
      11.11%    11.11%  dd       libc-2.28.so       [.] 0x000000000002a980
      11.11%    11.11%  dd       libc-2.28.so       [.] 0x0000000000083340

  # Samples: 29  of event 'remote-access'
  # Event count (approx.): 29
  #
  # Children      Self  Command  Shared Object      Symbol
  # ........  ........  .......  .................  ...........................
  #
      41.38%    41.38%  dd       [kernel.kallsyms]  [k] filemap_map_pages
      10.34%    10.34%  dd       [kernel.kallsyms]  [k] unlock_page_memcg
      10.34%    10.34%  dd       [kernel.kallsyms]  [k] unmap_page_range
       6.90%     6.90%  dd       [kernel.kallsyms]  [k] release_pages
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] PageHuge
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] __queue_work
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_add_file_rmap
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_counter_try_charge
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] page_remove_rmap
       3.45%     3.45%  dd       [kernel.kallsyms]  [k] xas_start
       3.45%     3.45%  dd       ld-2.28.so         [.] 0x0000000000002a1c
       3.45%     3.45%  dd       ld-2.28.so         [.] 0x0000000000008b5c
       3.45%     3.45%  dd       ld-2.28.so         [.] 0x00000000000093cc

Signed-off-by: default avatarTan Xiaojun <tanxiaojun@huawei.com>
Tested-by: default avatarJames Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.org


Signed-off-by: default avatarJames Clark <james.clark@arm.com>
Signed-off-by: default avatarLeo Yan <leo.yan@linaro.org>
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 9f74d770
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
+219 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * arm_spe_decoder.c: ARM SPE support
 */

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <errno.h>
#include <inttypes.h>
#include <stdbool.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <linux/compiler.h>
#include <linux/zalloc.h>

#include "../auxtrace.h"
#include "../debug.h"
#include "../util.h"

#include "arm-spe-decoder.h"

#ifndef BIT
#define BIT(n)		(1UL << (n))
#endif

static u64 arm_spe_calc_ip(int index, u64 payload)
{
	u8 *addr = (u8 *)&payload;
	int ns, el;

	/* Instruction virtual address or Branch target address */
	if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
	    index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
		ns = addr[7] & SPE_ADDR_PKT_NS;
		el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;

		/* Fill highest byte for EL1 or EL2 (VHE) mode */
		if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
			addr[7] = 0xff;
		/* Clean highest byte for other cases */
		else
			addr[7] = 0x0;

	/* Data access virtual address */
	} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {

		/* Fill highest byte if bits [48..55] is 0xff */
		if (addr[6] == 0xff)
			addr[7] = 0xff;
		/* Otherwise, cleanup tags */
		else
			addr[7] = 0x0;

	/* Data access physical address */
	} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
		/* Cleanup byte 7 */
		addr[7] = 0x0;
	} else {
		pr_err("unsupported address packet index: 0x%x\n", index);
	}

	return payload;
}

struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
{
	struct arm_spe_decoder *decoder;

	if (!params->get_trace)
		return NULL;

	decoder = zalloc(sizeof(struct arm_spe_decoder));
	if (!decoder)
		return NULL;

	decoder->get_trace = params->get_trace;
	decoder->data = params->data;

	return decoder;
}

void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
{
	free(decoder);
}

static int arm_spe_get_data(struct arm_spe_decoder *decoder)
{
	struct arm_spe_buffer buffer = { .buf = 0, };
	int ret;

	pr_debug("Getting more data\n");
	ret = decoder->get_trace(&buffer, decoder->data);
	if (ret < 0)
		return ret;

	decoder->buf = buffer.buf;
	decoder->len = buffer.len;

	if (!decoder->len)
		pr_debug("No more data\n");

	return decoder->len;
}

static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
{
	int ret;

	do {
		if (!decoder->len) {
			ret = arm_spe_get_data(decoder);

			/* Failed to read out trace data */
			if (ret <= 0)
				return ret;
		}

		ret = arm_spe_get_packet(decoder->buf, decoder->len,
					 &decoder->packet);
		if (ret <= 0) {
			/* Move forward for 1 byte */
			decoder->buf += 1;
			decoder->len -= 1;
			return -EBADMSG;
		}

		decoder->buf += ret;
		decoder->len -= ret;
	} while (decoder->packet.type == ARM_SPE_PAD);

	return 1;
}

static int arm_spe_read_record(struct arm_spe_decoder *decoder)
{
	int err;
	int idx;
	u64 payload, ip;

	memset(&decoder->record, 0x0, sizeof(decoder->record));

	while (1) {
		err = arm_spe_get_next_packet(decoder);
		if (err <= 0)
			return err;

		idx = decoder->packet.index;
		payload = decoder->packet.payload;

		switch (decoder->packet.type) {
		case ARM_SPE_TIMESTAMP:
			decoder->record.timestamp = payload;
			return 1;
		case ARM_SPE_END:
			return 1;
		case ARM_SPE_ADDRESS:
			ip = arm_spe_calc_ip(idx, payload);
			if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
				decoder->record.from_ip = ip;
			else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
				decoder->record.to_ip = ip;
			break;
		case ARM_SPE_COUNTER:
			break;
		case ARM_SPE_CONTEXT:
			break;
		case ARM_SPE_OP_TYPE:
			break;
		case ARM_SPE_EVENTS:
			if (payload & BIT(EV_L1D_REFILL))
				decoder->record.type |= ARM_SPE_L1D_MISS;

			if (payload & BIT(EV_L1D_ACCESS))
				decoder->record.type |= ARM_SPE_L1D_ACCESS;

			if (payload & BIT(EV_TLB_WALK))
				decoder->record.type |= ARM_SPE_TLB_MISS;

			if (payload & BIT(EV_TLB_ACCESS))
				decoder->record.type |= ARM_SPE_TLB_ACCESS;

			if ((idx == 1 || idx == 2 || idx == 3) &&
			    (payload & BIT(EV_LLC_MISS)))
				decoder->record.type |= ARM_SPE_LLC_MISS;

			if ((idx == 1 || idx == 2 || idx == 3) &&
			    (payload & BIT(EV_LLC_ACCESS)))
				decoder->record.type |= ARM_SPE_LLC_ACCESS;

			if ((idx == 1 || idx == 2 || idx == 3) &&
			    (payload & BIT(EV_REMOTE_ACCESS)))
				decoder->record.type |= ARM_SPE_REMOTE_ACCESS;

			if (payload & BIT(EV_MISPRED))
				decoder->record.type |= ARM_SPE_BRANCH_MISS;

			break;
		case ARM_SPE_DATA_SOURCE:
			break;
		case ARM_SPE_BAD:
			break;
		case ARM_SPE_PAD:
			break;
		default:
			pr_err("Get packet error!\n");
			return -1;
		}
	}

	return 0;
}

int arm_spe_decode(struct arm_spe_decoder *decoder)
{
	return arm_spe_read_record(decoder);
}
+82 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * arm_spe_decoder.h: Arm Statistical Profiling Extensions support
 * Copyright (c) 2019-2020, Arm Ltd.
 */

#ifndef INCLUDE__ARM_SPE_DECODER_H__
#define INCLUDE__ARM_SPE_DECODER_H__

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#include "arm-spe-pkt-decoder.h"

enum arm_spe_events {
	EV_EXCEPTION_GEN	= 0,
	EV_RETIRED		= 1,
	EV_L1D_ACCESS		= 2,
	EV_L1D_REFILL		= 3,
	EV_TLB_ACCESS		= 4,
	EV_TLB_WALK		= 5,
	EV_NOT_TAKEN		= 6,
	EV_MISPRED		= 7,
	EV_LLC_ACCESS		= 8,
	EV_LLC_MISS		= 9,
	EV_REMOTE_ACCESS	= 10,
	EV_ALIGNMENT		= 11,
	EV_PARTIAL_PREDICATE	= 17,
	EV_EMPTY_PREDICATE	= 18,
};

enum arm_spe_sample_type {
	ARM_SPE_L1D_ACCESS	= 1 << 0,
	ARM_SPE_L1D_MISS	= 1 << 1,
	ARM_SPE_LLC_ACCESS	= 1 << 2,
	ARM_SPE_LLC_MISS	= 1 << 3,
	ARM_SPE_TLB_ACCESS	= 1 << 4,
	ARM_SPE_TLB_MISS	= 1 << 5,
	ARM_SPE_BRANCH_MISS	= 1 << 6,
	ARM_SPE_REMOTE_ACCESS	= 1 << 7,
};

struct arm_spe_record {
	enum arm_spe_sample_type type;
	int err;
	u64 from_ip;
	u64 to_ip;
	u64 timestamp;
};

struct arm_spe_insn;

struct arm_spe_buffer {
	const unsigned char *buf;
	size_t len;
	u64 offset;
	u64 trace_nr;
};

struct arm_spe_params {
	int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
	void *data;
};

struct arm_spe_decoder {
	int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
	void *data;
	struct arm_spe_record record;

	const unsigned char *buf;
	size_t len;

	struct arm_spe_pkt packet;
};

struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
void arm_spe_decoder_free(struct arm_spe_decoder *decoder);

int arm_spe_decode(struct arm_spe_decoder *decoder);

#endif
+16 −0
Original line number Diff line number Diff line
@@ -15,6 +15,8 @@
#define ARM_SPE_NEED_MORE_BYTES		-1
#define ARM_SPE_BAD_PACKET		-2

#define ARM_SPE_PKT_MAX_SZ		16

enum arm_spe_pkt_type {
	ARM_SPE_BAD,
	ARM_SPE_PAD,
@@ -34,6 +36,20 @@ struct arm_spe_pkt {
	uint64_t		payload;
};

#define SPE_ADDR_PKT_HDR_INDEX_INS		(0x0)
#define SPE_ADDR_PKT_HDR_INDEX_BRANCH		(0x1)
#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT	(0x2)
#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS	(0x3)

#define SPE_ADDR_PKT_NS				BIT(7)
#define SPE_ADDR_PKT_CH				BIT(6)
#define SPE_ADDR_PKT_EL_OFFSET			(5)
#define SPE_ADDR_PKT_EL_MASK			(0x3 << SPE_ADDR_PKT_EL_OFFSET)
#define SPE_ADDR_PKT_EL0			(0)
#define SPE_ADDR_PKT_EL1			(1)
#define SPE_ADDR_PKT_EL2			(2)
#define SPE_ADDR_PKT_EL3			(3)

const char *arm_spe_pkt_name(enum arm_spe_pkt_type);

int arm_spe_get_packet(const unsigned char *buf, size_t len,
+779 −42

File changed.

Preview size limit exceeded, changes collapsed.