cxl/pci: Add (hopeful) error handling support (2905cb52) · Commits · EulixOS / Software / Kernel

drivers/cxl/core/memdev.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -344,6 +344,7 @@ struct cxl_memdev devm_cxl_add_memdev(struct cxl_dev_state cxlds)
		* needed as this is ordered with cdev_add() publishing the device.
		*/
		cxlmd->cxlds = cxlds;
		cxlds->cxlmd = cxlmd;

		cdev = &cxlmd->cdev;
		rc = cdev_device_add(cdev, dev);

drivers/cxl/cxl.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -132,6 +132,7 @@ static inline int ways_to_cxl(unsigned int ways, u8 *iw)
		#define CXL_RAS_CORRECTABLE_MASK_OFFSET 0x10
		#define CXL_RAS_CORRECTABLE_MASK_MASK GENMASK(6, 0)
		#define CXL_RAS_CAP_CONTROL_OFFSET 0x14
		#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
		#define CXL_RAS_HEADER_LOG_OFFSET 0x18
		#define CXL_RAS_CAPABILITY_LENGTH 0x58

drivers/cxl/cxlmem.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -186,6 +186,7 @@ struct cxl_endpoint_dvsec_info {
		* Currently only memory devices are represented.
		*
		* @dev: The device associated with this CXL state
		* @cxlmd: The device representing the CXL.mem capabilities of @dev
		* @regs: Parsed register blocks
		* @cxl_dvsec: Offset to the PCIe device DVSEC
		* @payload_size: Size of space for payload
		@@ -218,6 +219,7 @@ struct cxl_endpoint_dvsec_info {
		*/
		struct cxl_dev_state {
		struct device *dev;
		struct cxl_memdev *cxlmd;

		struct cxl_regs regs;
		int cxl_dvsec;

drivers/cxl/pci.c

+137 −0

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@
		#include <linux/list.h>
		#include <linux/pci.h>
		#include <linux/pci-doe.h>
		#include <linux/aer.h>
		#include <linux/io.h>
		#include "cxlmem.h"
		#include "cxlpci.h"
		@@ -399,6 +400,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
		}
		}

		static void disable_aer(void *pdev)
		{
		pci_disable_pcie_error_reporting(pdev);
		}

		static int cxl_pci_probe(struct pci_dev pdev, const struct pci_device_id id)
		{
		struct cxl_register_map map;
		@@ -420,6 +426,7 @@ static int cxl_pci_probe(struct pci_dev pdev, const struct pci_device_id id)
		cxlds = cxl_dev_state_create(&pdev->dev);
		if (IS_ERR(cxlds))
		return PTR_ERR(cxlds);
		pci_set_drvdata(pdev, cxlds);

		cxlds->serial = pci_get_dsn(pdev);
		cxlds->cxl_dvsec = pci_find_dvsec_capability(
		@@ -474,6 +481,14 @@ static int cxl_pci_probe(struct pci_dev pdev, const struct pci_device_id id)
		if (IS_ERR(cxlmd))
		return PTR_ERR(cxlmd);

		if (cxlds->regs.ras) {
		pci_enable_pcie_error_reporting(pdev);
		rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
		if (rc)
		return rc;
		}
		pci_save_state(pdev);

		if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
		rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);

		@@ -487,10 +502,132 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
		};
		MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);

		/* CXL spec rev3.0 8.2.4.16.1 */
		static void header_log_copy(struct cxl_dev_state cxlds, u32 log)
		{
		void __iomem *addr;
		u32 *log_addr;
		int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);

		addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
		log_addr = log;

		for (i = 0; i < log_u32_size; i++) {
		*log_addr = readl(addr);
		log_addr++;
		addr += sizeof(u32);
		}
		}

		/*
		* Log the state of the RAS status registers and prepare them to log the
		* next error status. Return 1 if reset needed.
		*/
		static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
		{
		struct cxl_memdev *cxlmd = cxlds->cxlmd;
		struct device *dev = &cxlmd->dev;
		u32 hl[CXL_HEADERLOG_SIZE_U32];
		void __iomem *addr;
		u32 status;
		u32 fe;

		if (!cxlds->regs.ras)
		return false;

		addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
		status = le32_to_cpu((__force __le32)readl(addr));
		if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
		return false;

		/* If multiple errors, log header points to first error from ctrl reg */
		if (hweight32(status) > 1) {
		addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
		fe = BIT(le32_to_cpu((__force __le32)readl(addr)) &
		CXL_RAS_CAP_CONTROL_FE_MASK);
		} else {
		fe = status;
		}

		header_log_copy(cxlds, hl);
		trace_cxl_aer_uncorrectable_error(dev_name(dev), status, fe, hl);
		writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);

		return true;
		}

		static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
		pci_channel_state_t state)
		{
		struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
		struct cxl_memdev *cxlmd = cxlds->cxlmd;
		struct device *dev = &cxlmd->dev;
		bool ue;

		/*
		* A frozen channel indicates an impending reset which is fatal to
		* CXL.mem operation, and will likely crash the system. On the off
		* chance the situation is recoverable dump the status of the RAS
		* capability registers and bounce the active state of the memdev.
		*/
		ue = cxl_report_and_clear(cxlds);

		switch (state) {
		case pci_channel_io_normal:
		if (ue) {
		device_release_driver(dev);
		return PCI_ERS_RESULT_NEED_RESET;
		}
		return PCI_ERS_RESULT_CAN_RECOVER;
		case pci_channel_io_frozen:
		dev_warn(&pdev->dev,
		"%s: frozen state error detected, disable CXL.mem\n",
		dev_name(dev));
		device_release_driver(dev);
		return PCI_ERS_RESULT_NEED_RESET;
		case pci_channel_io_perm_failure:
		dev_warn(&pdev->dev,
		"failure state error detected, request disconnect\n");
		return PCI_ERS_RESULT_DISCONNECT;
		}
		return PCI_ERS_RESULT_NEED_RESET;
		}

		static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
		{
		struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
		struct cxl_memdev *cxlmd = cxlds->cxlmd;
		struct device *dev = &cxlmd->dev;

		dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
		dev_name(dev));
		pci_restore_state(pdev);
		if (device_attach(dev) <= 0)
		return PCI_ERS_RESULT_DISCONNECT;
		return PCI_ERS_RESULT_RECOVERED;
		}

		static void cxl_error_resume(struct pci_dev *pdev)
		{
		struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
		struct cxl_memdev *cxlmd = cxlds->cxlmd;
		struct device *dev = &cxlmd->dev;

		dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
		dev->driver ? "successful" : "failed");
		}

		static const struct pci_error_handlers cxl_error_handlers = {
		.error_detected = cxl_error_detected,
		.slot_reset = cxl_slot_reset,
		.resume = cxl_error_resume,
		};

		static struct pci_driver cxl_pci_driver = {
		.name = KBUILD_MODNAME,
		.id_table = cxl_mem_pci_tbl,
		.probe = cxl_pci_probe,
		.err_handler = &cxl_error_handlers,
		.driver = {
		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
		},