Commit c53edb9e authored by Qiuxu Zhuo's avatar Qiuxu Zhuo Committed by Aichun Shi
Browse files

EDAC/{skx_common,i10nm}: Fix incorrect far-memory error source indicator

mainline inclusion
from mainline-v6.13-rc1
commit a36667037a0c0e36c59407f8ae636295390239a5
category: bugfix
bugzilla: https://gitee.com/openeuler/intel-kernel/issues/IBPIMS
CVE: N/A
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a36667037a0c0e36c59407f8ae636295390239a5



-------------------------------------

The Granite Rapids CPUs with Flat2LM memory configurations may
mistakenly report near-memory errors as far-memory errors, resulting
in the invalid decoded ADXL results:

  EDAC skx: Bad imc -1

Fix this incorrect far-memory error source indicator by prefetching the
decoded far-memory controller ID, and adjust the error source indicator
to near-memory if the far-memory controller ID is invalid.

Intel-SIG: commit a36667037a0c EDAC/{skx_common,i10nm}: Fix incorrect far-memory error source indicator
Backport to fix EDAC driver for GNR

Fixes: ba987eaa ("EDAC/i10nm: Add Intel Granite Rapids server support")
Signed-off-by: default avatarQiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
Tested-by: default avatarDiego Garcia Rodriguez <diego.garcia.rodriguez@intel.com>
Link: https://lore.kernel.org/r/20241015072236.24543-3-qiuxu.zhuo@intel.com


[ Aichun Shi: amend commit log ]
Signed-off-by: default avatarAichun Shi <aichun.shi@intel.com>
parent e2c0a9de
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1037,6 +1037,7 @@ static int __init i10nm_init(void)
		return -ENODEV;

	cfg = (struct res_config *)id->driver_data;
	skx_set_res_cfg(cfg);
	res_cfg = cfg;

	rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
+23 −0
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list);
static bool skx_mem_cfg_2lm;
static struct res_config *skx_res_cfg;

int skx_adxl_get(void)
{
@@ -135,6 +136,22 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
		return false;
	}

	/*
	 * GNR with a Flat2LM memory configuration may mistakenly classify
	 * a near-memory error(DDR5) as a far-memory error(CXL), resulting
	 * in the incorrect selection of decoded ADXL components.
	 * To address this, prefetch the decoded far-memory controller ID
	 * and adjust the error source to near-memory if the far-memory
	 * controller ID is invalid.
	 */
	if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) {
		res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
		if (res->imc == -1) {
			err_src = ERR_SRC_2LM_NM;
			edac_dbg(0, "Adjust the error source to near-memory.\n");
		}
	}

	res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
	if (err_src == ERR_SRC_2LM_NM) {
		res->imc     = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
@@ -191,6 +208,12 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)
}
EXPORT_SYMBOL_GPL(skx_set_mem_cfg);

void skx_set_res_cfg(struct res_config *cfg)
{
	skx_res_cfg = cfg;
}
EXPORT_SYMBOL_GPL(skx_set_res_cfg);

void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{
	driver_decode = decode;
+1 −0
Original line number Diff line number Diff line
@@ -242,6 +242,7 @@ int skx_adxl_get(void);
void skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
void skx_set_mem_cfg(bool mem_cfg_2lm);
void skx_set_res_cfg(struct res_config *cfg);

int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
int skx_get_node_id(struct skx_dev *d, u8 *id);