Commit 4b5e35ce authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'edac_updates_for_v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Tony Luck:
 "Various fixes and support for new CPUs:

   - Clean up error messages from thunderx_edac

   - Add MODULE_DEVICE_TABLE to ti_edac so it will autoload

   - Use %pR to print resources in aspeed_edac

   - Add Yazen Ghannam as MAINTAINER for AMD edac drivers

   - Fix Ice Lake and Sapphire Rapids drivers to report correct "near"
     or "far" device for errors in 2LM configurations

   - Add support of on package high bandwidth memory in Sapphire Rapids

   - New CPU support for three CPUs supporting in-band ECC (IOT SKUs for
     ICL-NNPI, Tiger Lake and Alder Lake)

   - Don't even try to load Intel EDAC drivers when running as a guest

   - Fix Kconfig dependency on X86_MCE_INTEL for EDAC_IGEN6"

* tag 'edac_updates_for_v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/igen6: fix core dependency
  EDAC/Intel: Do not load EDAC driver when running as a guest
  EDAC/igen6: Add Intel Alder Lake SoC support
  EDAC/igen6: Add Intel Tiger Lake SoC support
  EDAC/igen6: Add Intel ICL-NNPI SoC support
  EDAC/i10nm: Add support for high bandwidth memory
  EDAC/i10nm: Add detection of memory levels for ICX/SPR servers
  EDAC/skx_common: Add new ADXL components for 2-level memory
  MAINTAINERS: Make Yazen Ghannam maintainer for EDAC-AMD64
  EDAC/aspeed: Use proper format string for printing resource
  EDAC/ti: Add missing MODULE_DEVICE_TABLE
  EDAC/thunderx: Remove irrelevant variable from error messages
parents e60d726f 0a9ece9b
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -6467,10 +6467,11 @@ F: Documentation/filesystems/ecryptfs.rst
F:	fs/ecryptfs/
EDAC-AMD64
M:	Borislav Petkov <bp@alien8.de>
M:	Yazen Ghannam <yazen.ghannam@amd.com>
L:	linux-edac@vger.kernel.org
S:	Maintained
S:	Supported
F:	drivers/edac/amd64_edac*
F:	drivers/edac/mce_amd*
EDAC-ARMADA
M:	Jan Luebbe <jlu@pengutronix.de>
+2 −1
Original line number Diff line number Diff line
@@ -270,7 +270,8 @@ config EDAC_PND2

config EDAC_IGEN6
	tristate "Intel client SoC Integrated MC"
	depends on PCI && X86_64 && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
	depends on PCI && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
	depends on X64_64 && X86_MCE_INTEL
	help
	  Support for error detection and correction on the Intel
	  client SoC Integrated Memory Controller using In-Band ECC IP.
+2 −2
Original line number Diff line number Diff line
@@ -254,8 +254,8 @@ static int init_csrows(struct mem_ctl_info *mci)
		return rc;
	}

	dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n",
		r.start, resource_size(&r), PAGE_SHIFT);
	dev_dbg(mci->pdev, "dt: /memory node resources: first page %pR, PAGE_SHIFT macro=0x%x\n",
		&r, PAGE_SHIFT);

	csrow->first_page = r.start >> PAGE_SHIFT;
	nr_pages = resource_size(&r) >> PAGE_SHIFT;
+162 −12
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@
#include "edac_module.h"
#include "skx_common.h"

#define I10NM_REVISION	"v0.0.4"
#define I10NM_REVISION	"v0.0.5"
#define EDAC_MOD_STR	"i10nm_edac"

/* Debug macros */
@@ -24,19 +24,39 @@
	pci_read_config_dword((d)->uracu, 0xd0, &(reg))
#define I10NM_GET_IMC_BAR(d, i, reg)	\
	pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
#define I10NM_GET_SAD(d, offset, i, reg)\
	pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
#define I10NM_GET_HBM_IMC_BAR(d, reg)	\
	pci_read_config_dword((d)->uracu, 0xd4, &(reg))
#define I10NM_GET_CAPID3_CFG(d, reg)	\
	pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
#define I10NM_GET_DIMMMTR(m, i, j)	\
	readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4)
	readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
	(i) * (m)->chan_mmio_sz + (j) * 4)
#define I10NM_GET_MCDDRTCFG(m, i, j)	\
	readl((m)->mbase + 0x20970 + (i) * (m)->chan_mmio_sz + (j) * 4)
	readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
	(i) * (m)->chan_mmio_sz + (j) * 4)
#define I10NM_GET_MCMTR(m, i)		\
	readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz)
	readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
	(i) * (m)->chan_mmio_sz)
#define I10NM_GET_AMAP(m, i)		\
	readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz)
	readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
	(i) * (m)->chan_mmio_sz)

#define I10NM_GET_SCK_MMIO_BASE(reg)	(GET_BITFIELD(reg, 0, 28) << 23)
#define I10NM_GET_IMC_MMIO_OFFSET(reg)	(GET_BITFIELD(reg, 0, 10) << 12)
#define I10NM_GET_IMC_MMIO_SIZE(reg)	((GET_BITFIELD(reg, 13, 23) - \
					 GET_BITFIELD(reg, 0, 10) + 1) << 12)
#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg)	\
	((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)

#define I10NM_HBM_IMC_MMIO_SIZE		0x9000
#define I10NM_IS_HBM_PRESENT(reg)	GET_BITFIELD(reg, 27, 30)
#define I10NM_IS_HBM_IMC(reg)		GET_BITFIELD(reg, 29, 29)

#define I10NM_MAX_SAD			16
#define I10NM_SAD_ENABLE(reg)		GET_BITFIELD(reg, 0, 0)
#define I10NM_SAD_NM_CACHEABLE(reg)	GET_BITFIELD(reg, 5, 5)

static struct list_head *i10nm_edac_list;

@@ -63,7 +83,32 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
	return pdev;
}

static int i10nm_get_all_munits(void)
static bool i10nm_check_2lm(struct res_config *cfg)
{
	struct skx_dev *d;
	u32 reg;
	int i;

	list_for_each_entry(d, i10nm_edac_list, list) {
		d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
						 PCI_SLOT(cfg->sad_all_devfn),
						 PCI_FUNC(cfg->sad_all_devfn));
		if (!d->sad_all)
			continue;

		for (i = 0; i < I10NM_MAX_SAD; i++) {
			I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
			if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
				edac_dbg(2, "2-level memory configuration.\n");
				return true;
			}
		}
	}

	return false;
}

static int i10nm_get_ddr_munits(void)
{
	struct pci_dev *mdev;
	void __iomem *mbase;
@@ -91,7 +136,7 @@ static int i10nm_get_all_munits(void)
		edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
			 j++, base, reg);

		for (i = 0; i < I10NM_NUM_IMC; i++) {
		for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
			mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
						   12 + i, 0);
			if (i == 0 && !mdev) {
@@ -127,11 +172,97 @@ static int i10nm_get_all_munits(void)
	return 0;
}

static bool i10nm_check_hbm_imc(struct skx_dev *d)
{
	u32 reg;

	if (I10NM_GET_CAPID3_CFG(d, reg)) {
		i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
		return false;
	}

	return I10NM_IS_HBM_PRESENT(reg) != 0;
}

static int i10nm_get_hbm_munits(void)
{
	struct pci_dev *mdev;
	void __iomem *mbase;
	u32 reg, off, mcmtr;
	struct skx_dev *d;
	int i, lmc;
	u64 base;

	list_for_each_entry(d, i10nm_edac_list, list) {
		d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
		if (!d->pcu_cr3)
			return -ENODEV;

		if (!i10nm_check_hbm_imc(d)) {
			i10nm_printk(KERN_DEBUG, "No hbm memory\n");
			return -ENODEV;
		}

		if (I10NM_GET_SCK_BAR(d, reg)) {
			i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
			return -ENODEV;
		}
		base = I10NM_GET_SCK_MMIO_BASE(reg);

		if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
			i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
			return -ENODEV;
		}
		base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);

		lmc = I10NM_NUM_DDR_IMC;

		for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
			mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
						   12 + i / 4, 1 + i % 4);
			if (i == 0 && !mdev) {
				i10nm_printk(KERN_ERR, "No hbm mc found\n");
				return -ENODEV;
			}
			if (!mdev)
				continue;

			d->imc[lmc].mdev = mdev;
			off = i * I10NM_HBM_IMC_MMIO_SIZE;

			edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
				 lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);

			mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
			if (!mbase) {
				i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
					     base + off);
				return -ENOMEM;
			}

			d->imc[lmc].mbase = mbase;
			d->imc[lmc].hbm_mc = true;

			mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
			if (!I10NM_IS_HBM_IMC(mcmtr)) {
				i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
				return -ENODEV;
			}

			lmc++;
		}
	}

	return 0;
}

static struct res_config i10nm_cfg0 = {
	.type			= I10NM,
	.decs_did		= 0x3452,
	.busno_cfg_offset	= 0xcc,
	.ddr_chan_mmio_sz	= 0x4000,
	.sad_all_devfn		= PCI_DEVFN(29, 0),
	.sad_all_offset		= 0x108,
};

static struct res_config i10nm_cfg1 = {
@@ -139,6 +270,8 @@ static struct res_config i10nm_cfg1 = {
	.decs_did		= 0x3452,
	.busno_cfg_offset	= 0xd0,
	.ddr_chan_mmio_sz	= 0x4000,
	.sad_all_devfn		= PCI_DEVFN(29, 0),
	.sad_all_offset		= 0x108,
};

static struct res_config spr_cfg = {
@@ -146,7 +279,10 @@ static struct res_config spr_cfg = {
	.decs_did		= 0x3252,
	.busno_cfg_offset	= 0xd0,
	.ddr_chan_mmio_sz	= 0x8000,
	.hbm_chan_mmio_sz	= 0x4000,
	.support_ddr5		= true,
	.sad_all_devfn		= PCI_DEVFN(10, 0),
	.sad_all_offset		= 0x300,
};

static const struct x86_cpu_id i10nm_cpuids[] = {
@@ -179,13 +315,13 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
	struct dimm_info *dimm;
	int i, j, ndimms;

	for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
	for (i = 0; i < imc->num_channels; i++) {
		if (!imc->mbase)
			continue;

		ndimms = 0;
		amap = I10NM_GET_AMAP(imc, i);
		for (j = 0; j < I10NM_NUM_DIMMS; j++) {
		for (j = 0; j < imc->num_dimms; j++) {
			dimm = edac_get_dimm(mci, i, j, 0);
			mtr = I10NM_GET_DIMMMTR(imc, i, j);
			mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
@@ -278,6 +414,9 @@ static int __init i10nm_init(void)
	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
		return -EBUSY;

	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
		return -ENODEV;

	id = x86_match_cpu(i10nm_cpuids);
	if (!id)
		return -ENODEV;
@@ -296,8 +435,11 @@ static int __init i10nm_init(void)
		return -ENODEV;
	}

	rc = i10nm_get_all_munits();
	if (rc < 0)
	skx_set_mem_cfg(i10nm_check_2lm(cfg));

	rc = i10nm_get_ddr_munits();

	if (i10nm_get_hbm_munits() && rc)
		goto fail;

	list_for_each_entry(d, i10nm_edac_list, list) {
@@ -318,7 +460,15 @@ static int __init i10nm_init(void)
			d->imc[i].lmc = i;
			d->imc[i].src_id  = src_id;
			d->imc[i].node_id = node_id;
			if (d->imc[i].hbm_mc) {
				d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
				d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
				d->imc[i].num_dimms    = I10NM_NUM_HBM_DIMMS;
			} else {
				d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
				d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
				d->imc[i].num_dimms    = I10NM_NUM_DDR_DIMMS;
			}

			rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
					      "Intel_10nm Socket", EDAC_MOD_STR,
+349 −25
Original line number Diff line number Diff line
@@ -22,11 +22,12 @@
#include <linux/io.h>
#include <asm/mach_traps.h>
#include <asm/nmi.h>
#include <asm/mce.h>

#include "edac_mc.h"
#include "edac_module.h"

#define IGEN6_REVISION	"v2.4"
#define IGEN6_REVISION	"v2.5"

#define EDAC_MOD_STR	"igen6_edac"
#define IGEN6_NMI_NAME	"igen6_ibecc"
@@ -40,7 +41,7 @@

#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))

#define NUM_IMC				1 /* Max memory controllers */
#define NUM_IMC				2 /* Max memory controllers */
#define NUM_CHANNELS			2 /* Max channels */
#define NUM_DIMMS			2 /* Max DIMMs per channel */

@@ -54,6 +55,10 @@
#define CAPID_C_OFFSET			0xec
#define CAPID_C_IBECC			BIT(15)

/* Capability register E */
#define CAPID_E_OFFSET			0xf0
#define CAPID_E_IBECC			BIT(12)

/* Error Status */
#define ERRSTS_OFFSET			0xc8
#define ERRSTS_CE			BIT_ULL(6)
@@ -70,7 +75,7 @@
#define IBECC_ACTIVATE_EN		BIT(0)

/* IBECC error log */
#define ECC_ERROR_LOG_OFFSET		(IBECC_BASE + 0x170)
#define ECC_ERROR_LOG_OFFSET		(IBECC_BASE + res_cfg->ibecc_error_log_offset)
#define ECC_ERROR_LOG_CE		BIT_ULL(62)
#define ECC_ERROR_LOG_UE		BIT_ULL(63)
#define ECC_ERROR_LOG_ADDR_SHIFT	5
@@ -84,39 +89,54 @@
#define MCHBAR_SIZE			0x10000

/* Parameters for the channel decode stage */
#define MAD_INTER_CHANNEL_OFFSET	0x5000
#define IMC_BASE			(res_cfg->imc_base)
#define MAD_INTER_CHANNEL_OFFSET	IMC_BASE
#define MAD_INTER_CHANNEL_DDR_TYPE(v)	GET_BITFIELD(v, 0, 2)
#define MAD_INTER_CHANNEL_ECHM(v)	GET_BITFIELD(v, 3, 3)
#define MAD_INTER_CHANNEL_CH_L_MAP(v)	GET_BITFIELD(v, 4, 4)
#define MAD_INTER_CHANNEL_CH_S_SIZE(v)	((u64)GET_BITFIELD(v, 12, 19) << 29)

/* Parameters for DRAM decode stage */
#define MAD_INTRA_CH0_OFFSET		0x5004
#define MAD_INTRA_CH0_OFFSET		(IMC_BASE + 4)
#define MAD_INTRA_CH_DIMM_L_MAP(v)	GET_BITFIELD(v, 0, 0)

/* DIMM characteristics */
#define MAD_DIMM_CH0_OFFSET		0x500c
#define MAD_DIMM_CH0_OFFSET		(IMC_BASE + 0xc)
#define MAD_DIMM_CH_DIMM_L_SIZE(v)	((u64)GET_BITFIELD(v, 0, 6) << 29)
#define MAD_DIMM_CH_DLW(v)		GET_BITFIELD(v, 7, 8)
#define MAD_DIMM_CH_DIMM_S_SIZE(v)	((u64)GET_BITFIELD(v, 16, 22) << 29)
#define MAD_DIMM_CH_DSW(v)		GET_BITFIELD(v, 24, 25)

/* Hash for memory controller selection */
#define MAD_MC_HASH_OFFSET		(IMC_BASE + 0x1b8)
#define MAC_MC_HASH_LSB(v)		GET_BITFIELD(v, 1, 3)

/* Hash for channel selection */
#define CHANNEL_HASH_OFFSET		0X5024
#define CHANNEL_HASH_OFFSET		(IMC_BASE + 0x24)
/* Hash for enhanced channel selection */
#define CHANNEL_EHASH_OFFSET		0X5028
#define CHANNEL_EHASH_OFFSET		(IMC_BASE + 0x28)
#define CHANNEL_HASH_MASK(v)		(GET_BITFIELD(v, 6, 19) << 6)
#define CHANNEL_HASH_LSB_MASK_BIT(v)	GET_BITFIELD(v, 24, 26)
#define CHANNEL_HASH_MODE(v)		GET_BITFIELD(v, 28, 28)

/* Parameters for memory slice decode stage */
#define MEM_SLICE_HASH_MASK(v)		(GET_BITFIELD(v, 6, 19) << 6)
#define MEM_SLICE_HASH_LSB_MASK_BIT(v)	GET_BITFIELD(v, 24, 26)

static struct res_config {
	bool machine_check;
	int num_imc;
	u32 imc_base;
	u32 cmf_base;
	u32 cmf_size;
	u32 ms_hash_offset;
	u32 ibecc_base;
	u32 ibecc_error_log_offset;
	bool (*ibecc_available)(struct pci_dev *pdev);
	/* Convert error address logged in IBECC to system physical address */
	u64 (*err_addr_to_sys_addr)(u64 eaddr);
	u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
	/* Convert error address logged in IBECC to integrated memory controller address */
	u64 (*err_addr_to_imc_addr)(u64 eaddr);
	u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
} *res_cfg;

struct igen6_imc {
@@ -125,6 +145,7 @@ struct igen6_imc {
	struct pci_dev *pdev;
	struct device dev;
	void __iomem *window;
	u64 size;
	u64 ch_s_size;
	int ch_l_map;
	u64 dimm_s_size[NUM_CHANNELS];
@@ -134,6 +155,9 @@ struct igen6_imc {

static struct igen6_pvt {
	struct igen6_imc imc[NUM_IMC];
	u64 ms_hash;
	u64 ms_s_size;
	int ms_l_map;
} *igen6_pvt;

/* The top of low usable DRAM */
@@ -183,6 +207,21 @@ static struct work_struct ecclog_work;
#define DID_EHL_SKU14	0x4534
#define DID_EHL_SKU15	0x4536

/* Compute die IDs for ICL-NNPI with IBECC */
#define DID_ICL_SKU8	0x4581
#define DID_ICL_SKU10	0x4585
#define DID_ICL_SKU11	0x4589
#define DID_ICL_SKU12	0x458d

/* Compute die IDs for Tiger Lake with IBECC */
#define DID_TGL_SKU	0x9a14

/* Compute die IDs for Alder Lake with IBECC */
#define DID_ADL_SKU1	0x4601
#define DID_ADL_SKU2	0x4602
#define DID_ADL_SKU3	0x4621
#define DID_ADL_SKU4	0x4641

static bool ehl_ibecc_available(struct pci_dev *pdev)
{
	u32 v;
@@ -193,12 +232,12 @@ static bool ehl_ibecc_available(struct pci_dev *pdev)
	return !!(CAPID_C_IBECC & v);
}

static u64 ehl_err_addr_to_sys_addr(u64 eaddr)
static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
{
	return eaddr;
}

static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
{
	if (eaddr < igen6_tolud)
		return eaddr;
@@ -212,14 +251,158 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
	return eaddr;
}

static bool icl_ibecc_available(struct pci_dev *pdev)
{
	u32 v;

	if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
		return false;

	return !(CAPID_C_IBECC & v) &&
		(boot_cpu_data.x86_stepping >= 1);
}

static bool tgl_ibecc_available(struct pci_dev *pdev)
{
	u32 v;

	if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
		return false;

	return !(CAPID_E_IBECC & v);
}

static u64 mem_addr_to_sys_addr(u64 maddr)
{
	if (maddr < igen6_tolud)
		return maddr;

	if (igen6_tom <= _4GB)
		return maddr - igen6_tolud + _4GB;

	if (maddr < _4GB)
		return maddr - igen6_tolud + igen6_tom;

	return maddr;
}

static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
{
	u64 hash_addr = addr & mask, hash = hash_init;
	u64 intlv = (addr >> intlv_bit) & 1;
	int i;

	for (i = 6; i < 20; i++)
		hash ^= (hash_addr >> i) & 1;

	return hash ^ intlv;
}

static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
{
	u64 maddr, hash, mask, ms_s_size;
	int intlv_bit;
	u32 ms_hash;

	ms_s_size = igen6_pvt->ms_s_size;
	if (eaddr >= ms_s_size)
		return eaddr + ms_s_size;

	ms_hash = igen6_pvt->ms_hash;

	mask = MEM_SLICE_HASH_MASK(ms_hash);
	intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;

	maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
		GET_BITFIELD(eaddr, 0, intlv_bit - 1);

	hash = mem_slice_hash(maddr, mask, mc, intlv_bit);

	return maddr | (hash << intlv_bit);
}

static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
{
	u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);

	return mem_addr_to_sys_addr(maddr);
}

static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
{
	return eaddr;
}

static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
{
	return mem_addr_to_sys_addr(eaddr);
}

static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
{
	u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
	struct igen6_imc *imc = &igen6_pvt->imc[mc];
	int intlv_bit;
	u32 mc_hash;

	if (eaddr >= 2 * ms_s_size)
		return eaddr - ms_s_size;

	mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);

	intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;

	imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
		   GET_BITFIELD(eaddr, 0, intlv_bit - 1);

	return imc_addr;
}

static struct res_config ehl_cfg = {
	.num_imc		= 1,
	.imc_base		= 0x5000,
	.ibecc_base		= 0xdc00,
	.ibecc_available	= ehl_ibecc_available,
	.ibecc_error_log_offset	= 0x170,
	.err_addr_to_sys_addr	= ehl_err_addr_to_sys_addr,
	.err_addr_to_imc_addr	= ehl_err_addr_to_imc_addr,
};

static struct res_config icl_cfg = {
	.num_imc		= 1,
	.imc_base		= 0x5000,
	.ibecc_base		= 0xd800,
	.ibecc_error_log_offset	= 0x170,
	.ibecc_available	= icl_ibecc_available,
	.err_addr_to_sys_addr	= ehl_err_addr_to_sys_addr,
	.err_addr_to_imc_addr	= ehl_err_addr_to_imc_addr,
};

static struct res_config tgl_cfg = {
	.machine_check		= true,
	.num_imc		= 2,
	.imc_base		= 0x5000,
	.cmf_base		= 0x11000,
	.cmf_size		= 0x800,
	.ms_hash_offset		= 0xac,
	.ibecc_base		= 0xd400,
	.ibecc_error_log_offset	= 0x170,
	.ibecc_available	= tgl_ibecc_available,
	.err_addr_to_sys_addr	= tgl_err_addr_to_sys_addr,
	.err_addr_to_imc_addr	= tgl_err_addr_to_imc_addr,
};

static struct res_config adl_cfg = {
	.machine_check		= true,
	.num_imc		= 2,
	.imc_base		= 0xd800,
	.ibecc_base		= 0xd400,
	.ibecc_error_log_offset	= 0x68,
	.ibecc_available	= tgl_ibecc_available,
	.err_addr_to_sys_addr	= adl_err_addr_to_sys_addr,
	.err_addr_to_imc_addr	= adl_err_addr_to_imc_addr,
};

static const struct pci_device_id igen6_pci_tbl[] = {
	{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
	{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
@@ -232,6 +415,15 @@ static const struct pci_device_id igen6_pci_tbl[] = {
	{ PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
	{ PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
	{ PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
	{ PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
	{ PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
	{ PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
	{ PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
	{ PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
	{ PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
	{ PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
	{ PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
	{ PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
	{ },
};
MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
@@ -490,8 +682,8 @@ static void ecclog_work_cb(struct work_struct *work)
		eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
			ECC_ERROR_LOG_ADDR_SHIFT;
		res.mc	     = node->mc;
		res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr);
		res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr);
		res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
		res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);

		mci = igen6_pvt->imc[res.mc].mci;

@@ -540,6 +732,57 @@ static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
	return NMI_HANDLED;
}

static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
			      void *data)
{
	struct mce *mce = (struct mce *)data;
	char *type;

	if (mce->kflags & MCE_HANDLED_CEC)
		return NOTIFY_DONE;

	/*
	 * Ignore unless this is a memory related error.
	 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
	 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
	 */
	if ((mce->status & 0xefff) >> 7 != 1)
		return NOTIFY_DONE;

	if (mce->mcgstatus & MCG_STATUS_MCIP)
		type = "Exception";
	else
		type = "Event";

	edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
		 mce->extcpu, type, mce->mcgstatus,
		 mce->bank, mce->status);
	edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
	edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
	edac_dbg(0, "MISC 0x%llx\n", mce->misc);
	edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
		 mce->cpuvendor, mce->cpuid, mce->time,
		 mce->socketid, mce->apicid);
	/*
	 * We just use the Machine Check for the memory error notification.
	 * Each memory controller is associated with an IBECC instance.
	 * Directly read and clear the error information(error address and
	 * error type) on all the IBECC instances so that we know on which
	 * memory controller the memory error(s) occurred.
	 */
	if (!ecclog_handler())
		return NOTIFY_DONE;

	mce->kflags |= MCE_HANDLED_EDAC;

	return NOTIFY_DONE;
}

static struct notifier_block ecclog_mce_dec = {
	.notifier_call	= ecclog_mce_handler,
	.priority	= MCE_PRIO_EDAC,
};

static bool igen6_check_ecc(struct igen6_imc *imc)
{
	u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
@@ -573,6 +816,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
		imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
		imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
		imc->dimm_l_map[i]  = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
		imc->size += imc->dimm_s_size[i];
		imc->size += imc->dimm_l_size[i];
		ndimms = 0;

		for (j = 0; j < NUM_DIMMS; j++) {
@@ -608,6 +853,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
		}
	}

	edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);

	return 0;
}

@@ -857,6 +1104,80 @@ static void igen6_unregister_mcis(void)
	}
}

static int igen6_mem_slice_setup(u64 mchbar)
{
	struct igen6_imc *imc = &igen6_pvt->imc[0];
	u64 base = mchbar + res_cfg->cmf_base;
	u32 offset = res_cfg->ms_hash_offset;
	u32 size = res_cfg->cmf_size;
	u64 ms_s_size, ms_hash;
	void __iomem *cmf;
	int ms_l_map;

	edac_dbg(2, "\n");

	if (imc[0].size < imc[1].size) {
		ms_s_size = imc[0].size;
		ms_l_map  = 1;
	} else {
		ms_s_size = imc[1].size;
		ms_l_map  = 0;
	}

	igen6_pvt->ms_s_size = ms_s_size;
	igen6_pvt->ms_l_map  = ms_l_map;

	edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
		 ms_s_size >> 20, ms_l_map);

	if (!size)
		return 0;

	cmf = ioremap(base, size);
	if (!cmf) {
		igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
		return -ENODEV;
	}

	ms_hash = readq(cmf + offset);
	igen6_pvt->ms_hash = ms_hash;

	edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);

	iounmap(cmf);

	return 0;
}

static int register_err_handler(void)
{
	int rc;

	if (res_cfg->machine_check) {
		mce_register_decode_chain(&ecclog_mce_dec);
		return 0;
	}

	rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
				  0, IGEN6_NMI_NAME);
	if (rc) {
		igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
		return rc;
	}

	return 0;
}

static void unregister_err_handler(void)
{
	if (res_cfg->machine_check) {
		mce_unregister_decode_chain(&ecclog_mce_dec);
		return;
	}

	unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
}

static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
	u64 mchbar;
@@ -880,6 +1201,12 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
			goto fail2;
	}

	if (res_cfg->num_imc > 1) {
		rc = igen6_mem_slice_setup(mchbar);
		if (rc)
			goto fail2;
	}

	ecclog_pool = ecclog_gen_pool_create();
	if (!ecclog_pool) {
		rc = -ENOMEM;
@@ -892,12 +1219,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
	/* Check if any pending errors before registering the NMI handler */
	ecclog_handler();

	rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
				  0, IGEN6_NMI_NAME);
	if (rc) {
		igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
	rc = register_err_handler();
	if (rc)
		goto fail3;
	}

	/* Enable error reporting */
	rc = errcmd_enable_error_reporting(true);
@@ -925,7 +1249,7 @@ static void igen6_remove(struct pci_dev *pdev)

	igen6_debug_teardown();
	errcmd_enable_error_reporting(false);
	unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
	unregister_err_handler();
	irq_work_sync(&ecclog_irq_work);
	flush_work(&ecclog_work);
	gen_pool_destroy(ecclog_pool);
Loading