Commit bb511d4b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'edac_updates_for_v6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull intel EDAC fixes from Tony Luck:

 - Old igen6 driver could lose pending events during initialization

 - Sapphire Rapids workstations have fewer memory controllers than their
   bigger siblings. This confused the driver.

* tag 'edac_updates_for_v6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/igen6: Fix the issue of no error events
  EDAC/i10nm: Skip the absent memory controllers
parents a55b0a02 ce53ad81
Loading
Loading
Loading
Loading
+49 −5
Original line number Diff line number Diff line
@@ -658,13 +658,49 @@ static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsi
	return mdev;
}

/**
 * i10nm_imc_absent() - Check whether the memory controller @imc is absent
 *
 * @imc    : The pointer to the structure of memory controller EDAC device.
 *
 * RETURNS : true if the memory controller EDAC device is absent, false otherwise.
 */
static bool i10nm_imc_absent(struct skx_imc *imc)
{
	u32 mcmtr;
	int i;

	switch (res_cfg->type) {
	case SPR:
		for (i = 0; i < res_cfg->ddr_chan_num; i++) {
			mcmtr = I10NM_GET_MCMTR(imc, i);
			edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr);
			if (mcmtr != ~0)
				return false;
		}

		/*
		 * Some workstations' absent memory controllers still
		 * appear as PCIe devices, misleading the EDAC driver.
		 * By observing that the MMIO registers of these absent
		 * memory controllers consistently hold the value of ~0.
		 *
		 * We identify a memory controller as absent by checking
		 * if its MMIO register "mcmtr" == ~0 in all its channels.
		 */
		return true;
	default:
		return false;
	}
}

static int i10nm_get_ddr_munits(void)
{
	struct pci_dev *mdev;
	void __iomem *mbase;
	unsigned long size;
	struct skx_dev *d;
	int i, j = 0;
	int i, lmc, j = 0;
	u32 reg, off;
	u64 base;

@@ -690,7 +726,7 @@ static int i10nm_get_ddr_munits(void)
		edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
			 j++, base, reg);

		for (i = 0; i < res_cfg->ddr_imc_num; i++) {
		for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) {
			mdev = get_ddr_munit(d, i, &off, &size);

			if (i == 0 && !mdev) {
@@ -700,8 +736,6 @@ static int i10nm_get_ddr_munits(void)
			if (!mdev)
				continue;

			d->imc[i].mdev = mdev;

			edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
				 i, base + off, size, reg);

@@ -712,7 +746,17 @@ static int i10nm_get_ddr_munits(void)
				return -ENODEV;
			}

			d->imc[i].mbase = mbase;
			d->imc[lmc].mbase = mbase;
			if (i10nm_imc_absent(&d->imc[lmc])) {
				pci_dev_put(mdev);
				iounmap(mbase);
				d->imc[lmc].mbase = NULL;
				edac_dbg(2, "Skip absent mc%d\n", i);
				continue;
			} else {
				d->imc[lmc].mdev = mdev;
				lmc++;
			}
		}
	}

+4 −4
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@
#include "edac_mc.h"
#include "edac_module.h"

#define IGEN6_REVISION	"v2.5"
#define IGEN6_REVISION	"v2.5.1"

#define EDAC_MOD_STR	"igen6_edac"
#define IGEN6_NMI_NAME	"igen6_ibecc"
@@ -1216,9 +1216,6 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
	INIT_WORK(&ecclog_work, ecclog_work_cb);
	init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);

	/* Check if any pending errors before registering the NMI handler */
	ecclog_handler();

	rc = register_err_handler();
	if (rc)
		goto fail3;
@@ -1230,6 +1227,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
		goto fail4;
	}

	/* Check if any pending errors before/during the registration of the error handler */
	ecclog_handler();

	igen6_debug_setup();
	return 0;
fail4: