Unverified Commit bf0d4259 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!15269 [OLK-5.10]Intel: Backport to fix EDAC driver for GNR platform

Merge Pull Request from: @allen-shi 
 
This PR is to fix EDAC driver for Intel GNR platform.

**Upstream commits from v6.13:**
a36667037a0c0e36c59407f8ae636295390239a5 EDAC/{skx_common,i10nm}: Fix incorrect far-memory error source indicator
2397f795735219caa9c2fe61e7bcdd0652e670d3 EDAC/skx_common: Differentiate memory error sources

https://lore.kernel.org/all/20241015072236.24543-2-qiuxu.zhuo@intel.com/
https://lore.kernel.org/all/20241015072236.24543-3-qiuxu.zhuo@intel.com/

**Upstream commits from v6.11 to fix compile warning:**
8b935823530d6fd0bea0e90a747f3ebd8cfefb0d EDAC/{skx_common,skx,i10nm}: Move the common debug code to skx_common
123b158635505c89ed0d3ef45c5845ff9030a466 EDAC, i10nm: make skx_common.o a separate module

https://lore.kernel.org/all/20240829055101.56245-1-qiuxu.zhuo@intel.com/
https://lore.kernel.org/all/20240529095132.1929397-1-arnd@kernel.org/

 **Intel-Kernel Issue** 
[#IBPIMS](https://gitee.com/openeuler/intel-kernel/issues/IBPIMS)

 **Test** 
Built and run the kernel successfully.
EDAC Test is PASS on Intel GNR platform.

 **Known Issue** 
N/A

 **Default config change** 
N/A 
 
Link:https://gitee.com/openeuler/kernel/pulls/15269

 

Reviewed-by: default avatarJason Zeng <jason.zeng@intel.com>
Reviewed-by: default avatarLi Nan <linan122@huawei.com>
Signed-off-by: default avatarLi Nan <linan122@huawei.com>
parents 22f72139 c53edb9e
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -58,11 +58,13 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o
layerscape_edac_mod-y			:= fsl_ddr_edac.o layerscape_edac.o
obj-$(CONFIG_EDAC_LAYERSCAPE)		+= layerscape_edac_mod.o

skx_edac-y				:= skx_common.o skx_base.o
obj-$(CONFIG_EDAC_SKX)			+= skx_edac.o
skx_edac_common-y			:= skx_common.o

i10nm_edac-y				:= skx_common.o i10nm_base.o
obj-$(CONFIG_EDAC_I10NM)		+= i10nm_edac.o
skx_edac-y				:= skx_base.o
obj-$(CONFIG_EDAC_SKX)			+= skx_edac.o skx_edac_common.o

i10nm_edac-y				:= i10nm_base.o
obj-$(CONFIG_EDAC_I10NM)		+= i10nm_edac.o skx_edac_common.o

obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
obj-$(CONFIG_EDAC_CELL)			+= cell_edac.o
+3 −50
Original line number Diff line number Diff line
@@ -1012,54 +1012,6 @@ static struct notifier_block i10nm_mce_dec = {
	.priority	= MCE_PRIO_EDAC,
};

#ifdef CONFIG_EDAC_DEBUG
/*
 * Debug feature.
 * Exercise the address decode logic by writing an address to
 * /sys/kernel/debug/edac/i10nm_test/addr.
 */
static struct dentry *i10nm_test;

static int debugfs_u64_set(void *data, u64 val)
{
	struct mce m;

	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);

	memset(&m, 0, sizeof(m));
	/* ADDRV + MemRd + Unknown channel */
	m.status = MCI_STATUS_ADDRV + 0x90;
	/* One corrected error */
	m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
	m.addr = val;
	skx_mce_check_error(NULL, 0, &m);

	return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");

static void setup_i10nm_debug(void)
{
	i10nm_test = edac_debugfs_create_dir("i10nm_test");
	if (!i10nm_test)
		return;

	if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
				      NULL, &fops_u64_wo)) {
		debugfs_remove(i10nm_test);
		i10nm_test = NULL;
	}
}

static void teardown_i10nm_debug(void)
{
	debugfs_remove_recursive(i10nm_test);
}
#else
static inline void setup_i10nm_debug(void) {}
static inline void teardown_i10nm_debug(void) {}
#endif /*CONFIG_EDAC_DEBUG*/

static int __init i10nm_init(void)
{
	u8 mc = 0, src_id = 0, node_id = 0;
@@ -1085,6 +1037,7 @@ static int __init i10nm_init(void)
		return -ENODEV;

	cfg = (struct res_config *)id->driver_data;
	skx_set_res_cfg(cfg);
	res_cfg = cfg;

	rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
@@ -1155,7 +1108,7 @@ static int __init i10nm_init(void)

	opstate_init();
	mce_register_decode_chain(&i10nm_mce_dec);
	setup_i10nm_debug();
	skx_setup_debug("i10nm_test");

	if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
		skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
@@ -1183,7 +1136,7 @@ static void __exit i10nm_exit(void)
			enable_retry_rd_err_log(false);
	}

	teardown_i10nm_debug();
	skx_teardown_debug();
	mce_unregister_decode_chain(&i10nm_mce_dec);
	skx_adxl_put();
	skx_remove();
+2 −50
Original line number Diff line number Diff line
@@ -587,54 +587,6 @@ static struct notifier_block skx_mce_dec = {
	.priority	= MCE_PRIO_EDAC,
};

#ifdef CONFIG_EDAC_DEBUG
/*
 * Debug feature.
 * Exercise the address decode logic by writing an address to
 * /sys/kernel/debug/edac/skx_test/addr.
 */
static struct dentry *skx_test;

static int debugfs_u64_set(void *data, u64 val)
{
	struct mce m;

	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);

	memset(&m, 0, sizeof(m));
	/* ADDRV + MemRd + Unknown channel */
	m.status = MCI_STATUS_ADDRV + 0x90;
	/* One corrected error */
	m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
	m.addr = val;
	skx_mce_check_error(NULL, 0, &m);

	return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");

static void setup_skx_debug(void)
{
	skx_test = edac_debugfs_create_dir("skx_test");
	if (!skx_test)
		return;

	if (!edac_debugfs_create_file("addr", 0200, skx_test,
				      NULL, &fops_u64_wo)) {
		debugfs_remove(skx_test);
		skx_test = NULL;
	}
}

static void teardown_skx_debug(void)
{
	debugfs_remove_recursive(skx_test);
}
#else
static inline void setup_skx_debug(void) {}
static inline void teardown_skx_debug(void) {}
#endif /*CONFIG_EDAC_DEBUG*/

/*
 * skx_init:
 *	make sure we are running on the correct cpu model
@@ -725,7 +677,7 @@ static int __init skx_init(void)
	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
	opstate_init();

	setup_skx_debug();
	skx_setup_debug("skx_test");

	mce_register_decode_chain(&skx_mce_dec);

@@ -739,7 +691,7 @@ static void __exit skx_exit(void)
{
	edac_dbg(2, "\n");
	mce_unregister_decode_chain(&skx_mce_dec);
	teardown_skx_debug();
	skx_teardown_debug();
	if (nvdimm_count)
		skx_adxl_put();
	skx_remove();
+105 −20
Original line number Diff line number Diff line
@@ -47,8 +47,9 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list);
static bool skx_mem_cfg_2lm;
static struct res_config *skx_res_cfg;

int __init skx_adxl_get(void)
int skx_adxl_get(void)
{
	const char * const *names;
	int i, j;
@@ -110,14 +111,16 @@ int __init skx_adxl_get(void)

	return -ENODEV;
}
EXPORT_SYMBOL_GPL(skx_adxl_get);

void __exit skx_adxl_put(void)
void skx_adxl_put(void)
{
	kfree(adxl_values);
	kfree(adxl_msg);
}
EXPORT_SYMBOL_GPL(skx_adxl_put);

static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
{
	struct skx_dev *d;
	int i, len = 0;
@@ -133,8 +136,24 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
		return false;
	}

	/*
	 * GNR with a Flat2LM memory configuration may mistakenly classify
	 * a near-memory error(DDR5) as a far-memory error(CXL), resulting
	 * in the incorrect selection of decoded ADXL components.
	 * To address this, prefetch the decoded far-memory controller ID
	 * and adjust the error source to near-memory if the far-memory
	 * controller ID is invalid.
	 */
	if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) {
		res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
		if (res->imc == -1) {
			err_src = ERR_SRC_2LM_NM;
			edac_dbg(0, "Adjust the error source to near-memory.\n");
		}
	}

	res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
	if (error_in_1st_level_mem) {
	if (err_src == ERR_SRC_2LM_NM) {
		res->imc     = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
			       (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
		res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
@@ -187,12 +206,20 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)
{
	skx_mem_cfg_2lm = mem_cfg_2lm;
}
EXPORT_SYMBOL_GPL(skx_set_mem_cfg);

void skx_set_res_cfg(struct res_config *cfg)
{
	skx_res_cfg = cfg;
}
EXPORT_SYMBOL_GPL(skx_set_res_cfg);

void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{
	driver_decode = decode;
	skx_show_retry_rd_err_log = show_retry_log;
}
EXPORT_SYMBOL_GPL(skx_set_decode);

int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
{
@@ -206,6 +233,7 @@ int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
	*id = GET_BITFIELD(reg, 12, 14);
	return 0;
}
EXPORT_SYMBOL_GPL(skx_get_src_id);

int skx_get_node_id(struct skx_dev *d, u8 *id)
{
@@ -219,6 +247,7 @@ int skx_get_node_id(struct skx_dev *d, u8 *id)
	*id = GET_BITFIELD(reg, 0, 2);
	return 0;
}
EXPORT_SYMBOL_GPL(skx_get_node_id);

static int get_width(u32 mtr)
{
@@ -284,6 +313,7 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
		*list = &dev_edac_list;
	return ndev;
}
EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings);

int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
{
@@ -323,6 +353,7 @@ int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
	pci_dev_put(pdev);
	return -ENODEV;
}
EXPORT_SYMBOL_GPL(skx_get_hi_lo);

static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
			     int minval, int maxval, const char *name)
@@ -394,6 +425,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,

	return 1;
}
EXPORT_SYMBOL_GPL(skx_get_dimm_info);

int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
			int chan, int dimmno, const char *mod_str)
@@ -442,6 +474,7 @@ int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,

	return (size == 0 || size == ~0ull) ? 0 : 1;
}
EXPORT_SYMBOL_GPL(skx_get_nvdimm_info);

int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
		     const char *ctl_name, const char *mod_str,
@@ -512,6 +545,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
	imc->mci = NULL;
	return rc;
}
EXPORT_SYMBOL_GPL(skx_register_mci);

static void skx_unregister_mci(struct skx_imc *imc)
{
@@ -625,31 +659,27 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
			     optype, skx_msg);
}

static bool skx_error_in_1st_level_mem(const struct mce *m)
static enum error_source skx_error_source(const struct mce *m)
{
	u32 errcode;
	u32 errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK;

	if (!skx_mem_cfg_2lm)
		return false;

	errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK;
	if (errcode != MCACOD_MEM_CTL_ERR && errcode != MCACOD_EXT_MEM_ERR)
		return ERR_SRC_NOT_MEMORY;

	return errcode == MCACOD_EXT_MEM_ERR;
}

static bool skx_error_in_mem(const struct mce *m)
{
	u32 errcode;
	if (!skx_mem_cfg_2lm)
		return ERR_SRC_1LM;

	errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK;
	if (errcode == MCACOD_EXT_MEM_ERR)
		return ERR_SRC_2LM_NM;

	return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR);
	return ERR_SRC_2LM_FM;
}

int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
			void *data)
{
	struct mce *mce = (struct mce *)data;
	enum error_source err_src;
	struct decoded_addr res;
	struct mem_ctl_info *mci;
	char *type;
@@ -657,8 +687,10 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
	if (mce->kflags & MCE_HANDLED_CEC)
		return NOTIFY_DONE;

	err_src = skx_error_source(mce);

	/* Ignore unless this is memory related with an address */
	if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV))
	if (err_src == ERR_SRC_NOT_MEMORY || !(mce->status & MCI_STATUS_ADDRV))
		return NOTIFY_DONE;

	memset(&res, 0, sizeof(res));
@@ -668,7 +700,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
	/* Try driver decoder first */
	if (!(driver_decode && driver_decode(&res))) {
		/* Then try firmware decoder (ACPI DSM methods) */
		if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
		if (!(adxl_component_count && skx_adxl_decode(&res, err_src)))
			return NOTIFY_DONE;
	}

@@ -700,6 +732,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
	mce->kflags |= MCE_HANDLED_EDAC;
	return NOTIFY_DONE;
}
EXPORT_SYMBOL_GPL(skx_mce_check_error);

void skx_remove(void)
{
@@ -737,3 +770,55 @@ void skx_remove(void)
		kfree(d);
	}
}
EXPORT_SYMBOL_GPL(skx_remove);

#ifdef CONFIG_EDAC_DEBUG
/*
 * Debug feature.
 * Exercise the address decode logic by writing an address to
 * /sys/kernel/debug/edac/{skx,i10nm}_test/addr.
 */
static struct dentry *skx_test;

static int debugfs_u64_set(void *data, u64 val)
{
	struct mce m;

	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);

	memset(&m, 0, sizeof(m));
	/* ADDRV + MemRd + Unknown channel */
	m.status = MCI_STATUS_ADDRV + 0x90;
	/* One corrected error */
	m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
	m.addr = val;
	skx_mce_check_error(NULL, 0, &m);

	return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");

void skx_setup_debug(const char *name)
{
	skx_test = edac_debugfs_create_dir(name);
	if (!skx_test)
		return;

	if (!edac_debugfs_create_file("addr", 0200, skx_test,
				      NULL, &fops_u64_wo)) {
		debugfs_remove(skx_test);
		skx_test = NULL;
	}
}
EXPORT_SYMBOL_GPL(skx_setup_debug);

void skx_teardown_debug(void)
{
	debugfs_remove_recursive(skx_test);
}
EXPORT_SYMBOL_GPL(skx_teardown_debug);
#endif /*CONFIG_EDAC_DEBUG*/

MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Tony Luck");
MODULE_DESCRIPTION("MC Driver for Intel server processors");
+18 −2
Original line number Diff line number Diff line
@@ -147,6 +147,13 @@ enum {
	INDEX_MAX
};

enum error_source {
	ERR_SRC_1LM,
	ERR_SRC_2LM_NM,
	ERR_SRC_2LM_FM,
	ERR_SRC_NOT_MEMORY,
};

#define BIT_NM_MEMCTRL	BIT_ULL(INDEX_NM_MEMCTRL)
#define BIT_NM_CHANNEL	BIT_ULL(INDEX_NM_CHANNEL)
#define BIT_NM_DIMM	BIT_ULL(INDEX_NM_DIMM)
@@ -231,10 +238,11 @@ typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
typedef bool (*skx_decode_f)(struct decoded_addr *res);
typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);

int __init skx_adxl_get(void);
void __exit skx_adxl_put(void);
int skx_adxl_get(void);
void skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
void skx_set_mem_cfg(bool mem_cfg_2lm);
void skx_set_res_cfg(struct res_config *cfg);

int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
int skx_get_node_id(struct skx_dev *d, u8 *id);
@@ -260,4 +268,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,

void skx_remove(void);

#ifdef CONFIG_EDAC_DEBUG
void skx_setup_debug(const char *name);
void skx_teardown_debug(void);
#else
static inline void skx_setup_debug(const char *name) {}
static inline void skx_teardown_debug(void) {}
#endif

#endif /* _SKX_COMM_EDAC_H */