Commit 2905cb52 authored by Dan Williams's avatar Dan Williams
Browse files

cxl/pci: Add (hopeful) error handling support



Add nominal error handling that tears down CXL.mem in response to error
notifications that imply a device reset. Given some CXL.mem may be
operating as System RAM, there is a high likelihood that these error
events are fatal. However, if the system survives the notification the
expectation is that the driver behavior is equivalent to a hot-unplug
and re-plug of an endpoint.

Note that this does not change the mask values from the default. That
awaits CXL _OSC support to determine whether platform firmware is in
control of the mask registers.

Reviewed-by: default avatarJonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: default avatarDave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/166974413966.1608150.15522782911404473932.stgit@djiang5-desk3.ch.intel.com


Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 2f6e9c30
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -344,6 +344,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
	 * needed as this is ordered with cdev_add() publishing the device.
	 */
	cxlmd->cxlds = cxlds;
	cxlds->cxlmd = cxlmd;

	cdev = &cxlmd->cdev;
	rc = cdev_device_add(cdev, dev);
+1 −0
Original line number Diff line number Diff line
@@ -132,6 +132,7 @@ static inline int ways_to_cxl(unsigned int ways, u8 *iw)
#define CXL_RAS_CORRECTABLE_MASK_OFFSET 0x10
#define   CXL_RAS_CORRECTABLE_MASK_MASK GENMASK(6, 0)
#define CXL_RAS_CAP_CONTROL_OFFSET 0x14
#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
#define CXL_RAS_HEADER_LOG_OFFSET 0x18
#define CXL_RAS_CAPABILITY_LENGTH 0x58

+2 −0
Original line number Diff line number Diff line
@@ -186,6 +186,7 @@ struct cxl_endpoint_dvsec_info {
 * Currently only memory devices are represented.
 *
 * @dev: The device associated with this CXL state
 * @cxlmd: The device representing the CXL.mem capabilities of @dev
 * @regs: Parsed register blocks
 * @cxl_dvsec: Offset to the PCIe device DVSEC
 * @payload_size: Size of space for payload
@@ -218,6 +219,7 @@ struct cxl_endpoint_dvsec_info {
 */
struct cxl_dev_state {
	struct device *dev;
	struct cxl_memdev *cxlmd;

	struct cxl_regs regs;
	int cxl_dvsec;
+137 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/aer.h>
#include <linux/io.h>
#include "cxlmem.h"
#include "cxlpci.h"
@@ -399,6 +400,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
	}
}

static void disable_aer(void *pdev)
{
	pci_disable_pcie_error_reporting(pdev);
}

static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
	struct cxl_register_map map;
@@ -420,6 +426,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	cxlds = cxl_dev_state_create(&pdev->dev);
	if (IS_ERR(cxlds))
		return PTR_ERR(cxlds);
	pci_set_drvdata(pdev, cxlds);

	cxlds->serial = pci_get_dsn(pdev);
	cxlds->cxl_dvsec = pci_find_dvsec_capability(
@@ -474,6 +481,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	if (IS_ERR(cxlmd))
		return PTR_ERR(cxlmd);

	if (cxlds->regs.ras) {
		pci_enable_pcie_error_reporting(pdev);
		rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
		if (rc)
			return rc;
	}
	pci_save_state(pdev);

	if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
		rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);

@@ -487,10 +502,132 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);

/* CXL spec rev3.0 8.2.4.16.1 */
static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
{
	void __iomem *addr;
	u32 *log_addr;
	int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);

	addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
	log_addr = log;

	for (i = 0; i < log_u32_size; i++) {
		*log_addr = readl(addr);
		log_addr++;
		addr += sizeof(u32);
	}
}

/*
 * Log the state of the RAS status registers and prepare them to log the
 * next error status. Return 1 if reset needed.
 */
static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
{
	struct cxl_memdev *cxlmd = cxlds->cxlmd;
	struct device *dev = &cxlmd->dev;
	u32 hl[CXL_HEADERLOG_SIZE_U32];
	void __iomem *addr;
	u32 status;
	u32 fe;

	if (!cxlds->regs.ras)
		return false;

	addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
	status = le32_to_cpu((__force __le32)readl(addr));
	if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
		return false;

	/* If multiple errors, log header points to first error from ctrl reg */
	if (hweight32(status) > 1) {
		addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
		fe = BIT(le32_to_cpu((__force __le32)readl(addr)) &
				     CXL_RAS_CAP_CONTROL_FE_MASK);
	} else {
		fe = status;
	}

	header_log_copy(cxlds, hl);
	trace_cxl_aer_uncorrectable_error(dev_name(dev), status, fe, hl);
	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);

	return true;
}

static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
					   pci_channel_state_t state)
{
	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
	struct cxl_memdev *cxlmd = cxlds->cxlmd;
	struct device *dev = &cxlmd->dev;
	bool ue;

	/*
	 * A frozen channel indicates an impending reset which is fatal to
	 * CXL.mem operation, and will likely crash the system. On the off
	 * chance the situation is recoverable dump the status of the RAS
	 * capability registers and bounce the active state of the memdev.
	 */
	ue = cxl_report_and_clear(cxlds);

	switch (state) {
	case pci_channel_io_normal:
		if (ue) {
			device_release_driver(dev);
			return PCI_ERS_RESULT_NEED_RESET;
		}
		return PCI_ERS_RESULT_CAN_RECOVER;
	case pci_channel_io_frozen:
		dev_warn(&pdev->dev,
			 "%s: frozen state error detected, disable CXL.mem\n",
			 dev_name(dev));
		device_release_driver(dev);
		return PCI_ERS_RESULT_NEED_RESET;
	case pci_channel_io_perm_failure:
		dev_warn(&pdev->dev,
			 "failure state error detected, request disconnect\n");
		return PCI_ERS_RESULT_DISCONNECT;
	}
	return PCI_ERS_RESULT_NEED_RESET;
}

static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
{
	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
	struct cxl_memdev *cxlmd = cxlds->cxlmd;
	struct device *dev = &cxlmd->dev;

	dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
		 dev_name(dev));
	pci_restore_state(pdev);
	if (device_attach(dev) <= 0)
		return PCI_ERS_RESULT_DISCONNECT;
	return PCI_ERS_RESULT_RECOVERED;
}

static void cxl_error_resume(struct pci_dev *pdev)
{
	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
	struct cxl_memdev *cxlmd = cxlds->cxlmd;
	struct device *dev = &cxlmd->dev;

	dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
		 dev->driver ? "successful" : "failed");
}

static const struct pci_error_handlers cxl_error_handlers = {
	.error_detected	= cxl_error_detected,
	.slot_reset	= cxl_slot_reset,
	.resume		= cxl_error_resume,
};

static struct pci_driver cxl_pci_driver = {
	.name			= KBUILD_MODNAME,
	.id_table		= cxl_mem_pci_tbl,
	.probe			= cxl_pci_probe,
	.err_handler		= &cxl_error_handlers,
	.driver	= {
		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
	},