Commit 8bfc20ba authored by Namhyung Kim's avatar Namhyung Kim Committed by Peter Zijlstra
Browse files

perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src



Kernel IBS driver wasn't using new PERF_MEM_* APIs due to some of its
limitations. Mainly:

1. mem_lvl_num doesn't allow setting multiple sources whereas old API
   allows it. Setting multiple data sources is useful because IBS on
   pre-zen4 uarch doesn't provide fine granular DataSrc details (there
   is only one such DataSrc(2h) though).
2. perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf
   c2c (c2c_decode_stats()) does not use mem_lvl_num at all.

1st one can be handled using ANY_CACHE with HOPS_0. 2nd is purely perf
tool specific issue and should be fixed separately.

Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Signed-off-by: default avatarRavi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230725150206.184-4-ravi.bangoria@amd.com
parent 5c6e623f
Loading
Loading
Loading
Loading
+68 −88
Original line number Diff line number Diff line
@@ -728,7 +728,37 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
	return op_data2->data_src_lo;
}

static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
#define	L(x)		(PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
#define	LN(x)		PERF_MEM_S(LVLNUM, x)
#define	REM		PERF_MEM_S(REMOTE, REMOTE)
#define	HOPS(x)		PERF_MEM_S(HOPS, x)

static u64 g_data_src[8] = {
	[IBS_DATA_SRC_LOC_CACHE]	  = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
	[IBS_DATA_SRC_DRAM]		  = L(LOC_RAM) | LN(RAM),
	[IBS_DATA_SRC_REM_CACHE]	  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
	[IBS_DATA_SRC_IO]		  = L(IO) | LN(IO),
};

#define RMT_NODE_BITS			(1 << IBS_DATA_SRC_DRAM)
#define RMT_NODE_APPLICABLE(x)		(RMT_NODE_BITS & (1 << x))

static u64 g_zen4_data_src[32] = {
	[IBS_DATA_SRC_EXT_LOC_CACHE]	  = L(L3) | LN(L3),
	[IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
	[IBS_DATA_SRC_EXT_DRAM]		  = L(LOC_RAM) | LN(RAM),
	[IBS_DATA_SRC_EXT_FAR_CCX_CACHE]  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
	[IBS_DATA_SRC_EXT_PMEM]		  = LN(PMEM),
	[IBS_DATA_SRC_EXT_IO]		  = L(IO) | LN(IO),
	[IBS_DATA_SRC_EXT_EXT_MEM]	  = LN(CXL),
};

#define ZEN4_RMT_NODE_BITS		((1 << IBS_DATA_SRC_EXT_DRAM) | \
					 (1 << IBS_DATA_SRC_EXT_PMEM) | \
					 (1 << IBS_DATA_SRC_EXT_EXT_MEM))
#define ZEN4_RMT_NODE_APPLICABLE(x)	(ZEN4_RMT_NODE_BITS & (1 << x))

static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
				  union ibs_op_data3 *op_data3,
				  struct perf_sample_data *data)
{
@@ -736,30 +766,25 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
	u8 ibs_data_src = perf_ibs_data_src(op_data2);

	data_src->mem_lvl = 0;
	data_src->mem_lvl_num = 0;

	/*
	 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
	 * memory accesses. So, check DcUcMemAcc bit early.
	 */
	if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
		data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
		return;
	}
	if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
		return L(UNC) | LN(UNC);

	/* L1 Hit */
	if (op_data3->dc_miss == 0) {
		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
		return;
	}
	if (op_data3->dc_miss == 0)
		return L(L1) | LN(L1);

	/* L2 Hit */
	if (op_data3->l2_miss == 0) {
		/* Erratum #1293 */
		if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
		    !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
			data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
			return;
		}
		    !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
			return L(L2) | LN(L2);
	}

	/*
@@ -769,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
	if (data_src->mem_op != PERF_MEM_OP_LOAD)
		goto check_mab;

	/* L3 Hit */
	if (ibs_caps & IBS_CAPS_ZEN4) {
		if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
			return;
		}
	} else {
		if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
					    PERF_MEM_LVL_HIT;
			return;
		}
	}
		u64 val = g_zen4_data_src[ibs_data_src];

	/* A peer cache in a near CCX */
	if (ibs_caps & IBS_CAPS_ZEN4 &&
	    ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
		return;
	}

	/* A peer cache in a far CCX */
	if (ibs_caps & IBS_CAPS_ZEN4) {
		if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
			return;
		}
	} else {
		if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
			return;
		}
	}
		if (!val)
			goto check_mab;

	/* DRAM */
	if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
		if (op_data2->rmt_node == 0)
			data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
		/* HOPS_1 because IBS doesn't provide remote socket detail */
		if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
			if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
				val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
			else
			data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
		return;
				val |= REM | HOPS(1);
		}

	/* PMEM */
	if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
		data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
		if (op_data2->rmt_node) {
			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
			/* IBS doesn't provide Remote socket detail */
			data_src->mem_hops = PERF_MEM_HOPS_1;
		}
		return;
	}
		return val;
	} else {
		u64 val = g_data_src[ibs_data_src];

	/* Extension Memory */
	if (ibs_caps & IBS_CAPS_ZEN4 &&
	    ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
		data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
		if (op_data2->rmt_node) {
			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
			/* IBS doesn't provide Remote socket detail */
			data_src->mem_hops = PERF_MEM_HOPS_1;
		}
		return;
	}
		if (!val)
			goto check_mab;

	/* IO */
	if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
		data_src->mem_lvl = PERF_MEM_LVL_IO;
		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
		if (op_data2->rmt_node) {
			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
			/* IBS doesn't provide Remote socket detail */
			data_src->mem_hops = PERF_MEM_HOPS_1;
		/* HOPS_1 because IBS doesn't provide remote socket detail */
		if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
			if (ibs_data_src == IBS_DATA_SRC_DRAM)
				val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
			else
				val |= REM | HOPS(1);
		}
		return;

		return val;
	}

check_mab:
@@ -855,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
	 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
	 * MAB only when IBS fails to provide DataSrc.
	 */
	if (op_data3->dc_miss_no_mab_alloc) {
		data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
		return;
	}
	if (op_data3->dc_miss_no_mab_alloc)
		return L(LFB) | LN(LFB);

	data_src->mem_lvl = PERF_MEM_LVL_NA;
	/* Don't set HIT with NA */
	return PERF_MEM_S(LVL, NA) | LN(NA);
}

static bool perf_ibs_cache_hit_st_valid(void)
@@ -950,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
				  union ibs_op_data2 *op_data2,
				  union ibs_op_data3 *op_data3)
{
	perf_ibs_get_mem_lvl(op_data2, op_data3, data);
	union perf_mem_data_src *data_src = &data->data_src;

	data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
	perf_ibs_get_mem_snoop(op_data2, data);
	perf_ibs_get_tlb_lvl(op_data3, data);
	perf_ibs_get_mem_lock(op_data3, data);