Commit 6ed05a8e authored by Kajol Jain's avatar Kajol Jain Committed by Michael Ellerman
Browse files

powerpc/perf: Add data source encodings for power10 platform



The code represent memory/cache level data based on PERF_MEM_LVL_*
namespace, which is in the process of deprication in the favour of
newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.
Add data source encodings to represent cache/memory data based on
newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.

Add data source encodings to represent data coming from local
memory/Remote memory/distant memory and remote/distant cache hits.

In order to represent data coming from OpenCAPI cache/memory, we use
LVLNUM "PMEM" field which is used to present persistent memory accesses.

Result in power10 system with patch changes:

localhost:# ./perf mem report --sort="mem,sym,dso" --stdio
 # Overhead       Samples  Memory access             Symbol                      Shared Object
 # ........  ............  ........................  ..........................  ................
 #
    29.46%          2331  L1 or L1 hit              [.] __random                                     libc-2.28.so
    23.11%          2121  L1 or L1 hit              [.] producer_populate_cache                      producer_consumer
    18.56%          1758  L1 or L1 hit              [.] __random_r                                   libc-2.28.so
    15.64%          1559  L2 or L2 hit              [.] __random                                     libc-2.28.so
    .....
    0.09%              5  Remote socket, same board Any cache hit             [.] __random         libc-2.28.so
    0.07%              4  Remote socket, same board Any cache hit             [.] __random         libc-2.28.so
    .....

Signed-off-by: default avatarKajol Jain <kjain@linux.ibm.com>
Reviewed-by: default avatarMadhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211206091749.87585-5-kjain@linux.ibm.com
parent 4a20ee10
Loading
Loading
Loading
Loading
+42 −12
Original line number Diff line number Diff line
@@ -229,6 +229,20 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
		ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
		break;
	case 4:
		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
			ret = P(SNOOP, HIT);

			if (sub_idx == 1)
				ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
			else if (sub_idx == 2 || sub_idx == 3)
				ret |= P(LVL, HIT) | LEVEL(PMEM);
			else if (sub_idx == 4)
				ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2);
			else if (sub_idx == 5 || sub_idx == 7)
				ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
			else if (sub_idx == 6)
				ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3);
		} else {
			if (sub_idx <= 1)
				ret = PH(LVL, LOC_RAM);
			else if (sub_idx > 1 && sub_idx <= 2)
@@ -236,6 +250,7 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
			else
				ret = PH(LVL, REM_RAM2);
			ret |= P(SNOOP, HIT);
		}
		break;
	case 5:
		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
@@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
		}
		break;
	case 6:
		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
			if (sub_idx == 0)
				ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
					P(SNOOP, HIT) | P(HOPS, 2);
			else if (sub_idx == 1)
				ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
					P(SNOOP, HITM) | P(HOPS, 2);
			else if (sub_idx == 2)
				ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
					P(SNOOP, HIT) | P(HOPS, 3);
			else if (sub_idx == 3)
				ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
					P(SNOOP, HITM) | P(HOPS, 3);
		} else {
			ret = PH(LVL, REM_CCE2);
		if ((sub_idx == 0) || (sub_idx == 2))
			if (sub_idx == 0 || sub_idx == 2)
				ret |= P(SNOOP, HIT);
		else if ((sub_idx == 1) || (sub_idx == 3))
			else if (sub_idx == 1 || sub_idx == 3)
				ret |= P(SNOOP, HITM);
		}
		break;
	case 7:
		ret = PM(LVL, L1);