Commit cae1d759 authored by Kajol Jain's avatar Kajol Jain Committed by Peter Zijlstra
Browse files

tools/perf: Add mem_hops field in perf_mem_data_src structure



Going forward, future generation systems can have more hierarchy
within the node/package level but currently we don't have any data source
encoding field in perf, which can be used to represent this level of data.

Add a new field called 'mem_hops' in the perf_mem_data_src structure
which can be used to represent intra-node/package or inter-node/off-package
details. This field is of size 3 bits where PERF_MEM_HOPS_{NA, 0..6} value
can be used to present different hop levels data.

Also add corresponding macros to define mem_hop field values
and shift value.

Currently we define macro for HOPS_0 which corresponds
to data coming from another core but same node.

Add functionality to represent mem_hop field data in
perf_mem__lvl_scnprintf function with the help of added string
array called mem_hops.

For ex: Encodings for mem_hops fields with L2 cache:

L2                      - local L2
L2 | REMOTE | HOPS_0    - remote core, same node L2

Since with the addition of HOPS field, now remote can be used to
denote cache access from the same node but different core, a check
is added in the c2c_decode_stats function to set mrem only when HOPS
is zero along with set remote field.

Signed-off-by: default avatarKajol Jain <kjain@linux.ibm.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211006140654.298352-4-kjain@linux.ibm.com
parent fec9cc61
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -1210,14 +1210,16 @@ union perf_mem_data_src {
			mem_remote:1,   /* remote */
			mem_snoopx:2,	/* snoop mode, ext */
			mem_blk:3,	/* access blocked */
			mem_rsvd:21;
			mem_hops:3,	/* hop level */
			mem_rsvd:18;
	};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
	__u64 val;
	struct {
		__u64	mem_rsvd:21,
		__u64	mem_rsvd:18,
			mem_hops:3,	/* hop level */
			mem_blk:3,	/* access blocked */
			mem_snoopx:2,	/* snoop mode, ext */
			mem_remote:1,   /* remote */
@@ -1313,6 +1315,11 @@ union perf_mem_data_src {
#define PERF_MEM_BLK_ADDR	0x04 /* address conflict */
#define PERF_MEM_BLK_SHIFT	40

/* hop level */
#define PERF_MEM_HOPS_0		0x01 /* remote core, same node */
/* 2-7 available */
#define PERF_MEM_HOPS_SHIFT	43

#define PERF_MEM_S(a, s) \
	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)

+18 −1
Original line number Diff line number Diff line
@@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = {
	[PERF_MEM_LVLNUM_NA] = "N/A",
};

static const char * const mem_hops[] = {
	"N/A",
	/*
	 * While printing, 'Remote' will be added to represent
	 * 'Remote core, same node' accesses as remote field need
	 * to be set with mem_hops field.
	 */
	"core, same node",
};

int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
{
	size_t i, l = 0;
@@ -325,6 +335,9 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
		l += 7;
	}

	if (mem_info && mem_info->data_src.mem_hops)
		l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);

	printed = 0;
	for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
		if (!(m & 0x1))
@@ -471,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
	/*
	 * Skylake might report unknown remote level via this
	 * bit, consider it when evaluating remote HITMs.
	 *
	 * Incase of power, remote field can also be used to denote cache
	 * accesses from the another core of same node. Hence, setting
	 * mrem only when HOPS is zero along with set remote field.
	 */
	bool mrem  = data_src->mem_remote;
	bool mrem  = (data_src->mem_remote && !data_src->mem_hops);
	int err = 0;

#define HITM_INC(__f)		\