Commit 856ef55e authored by Dan Williams's avatar Dan Williams
Browse files

Merge branch 'for-6.4/cxl-poison' into for-6.4/cxl

Include the poison list and injection infrastructure from Alison for
v6.4.
parents 267214a2 30a8a105
Loading
Loading
Loading
Loading
+35 −0
Original line number Diff line number Diff line
What:		/sys/kernel/debug/cxl/memX/inject_poison
Date:		April, 2023
KernelVersion:	v6.4
Contact:	linux-cxl@vger.kernel.org
Description:
		(WO) When a Device Physical Address (DPA) is written to this
		attribute, the memdev driver sends an inject poison command to
		the device for the specified address. The DPA must be 64-byte
		aligned and the length of the injected poison is 64-bytes. If
		successful, the device returns poison when the address is
		accessed through the CXL.mem bus. Injecting poison adds the
		address to the device's Poison List and the error source is set
		to Injected. In addition, the device adds a poison creation
		event to its internal Informational Event log, updates the
		Event Status register, and if configured, interrupts the host.
		It is not an error to inject poison into an address that
		already has poison present and no error is returned. The
		inject_poison attribute is only visible for devices supporting
		the capability.


What:		/sys/kernel/debug/memX/clear_poison
Date:		April, 2023
KernelVersion:	v6.4
Contact:	linux-cxl@vger.kernel.org
Description:
		(WO) When a Device Physical Address (DPA) is written to this
		attribute, the memdev driver sends a clear poison command to
		the device for the specified address. Clearing poison removes
		the address from the device's Poison List and writes 0 (zero)
		for 64 bytes starting at address. It is not an error to clear
		poison from an address that does not have poison set. If the
		device cannot clear poison from the address, -ENXIO is returned.
		The clear_poison attribute is only visible for devices
		supporting the capability.
+14 −0
Original line number Diff line number Diff line
@@ -415,3 +415,17 @@ Description:
		1), and checks that the hardware accepts the commit request.
		Reading this value indicates whether the region is committed or
		not.


What:		/sys/bus/cxl/devices/memX/trigger_poison_list
Date:		April, 2023
KernelVersion:	v6.4
Contact:	linux-cxl@vger.kernel.org
Description:
		(WO) When a boolean 'true' is written to this attribute the
		memdev driver retrieves the poison list from the device. The
		list consists of addresses that are poisoned, or would result
		in poison if accessed, and the source of the poison. This
		attribute is only visible for devices supporting the
		capability. The retrieved errors are logged as kernel
		events when cxl_poison event tracing is enabled.
+11 −0
Original line number Diff line number Diff line
@@ -25,7 +25,12 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
int cxl_region_init(void);
void cxl_region_exit(void);
int cxl_get_poison_by_endpoint(struct cxl_port *port);
#else
static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
{
	return 0;
}
static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
{
}
@@ -64,4 +69,10 @@ int cxl_memdev_init(void);
void cxl_memdev_exit(void);
void cxl_mbox_init(void);

enum cxl_poison_trace_type {
	CXL_POISON_TRACE_LIST,
	CXL_POISON_TRACE_INJECT,
	CXL_POISON_TRACE_CLEAR,
};

#endif /* __CXL_CORE_H__ */
+143 −7
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@
#include <linux/debugfs.h>
#include <linux/ktime.h>
#include <linux/mutex.h>
#include <asm/unaligned.h>
#include <cxlpci.h>
#include <cxlmem.h>
#include <cxl.h>

@@ -61,12 +63,7 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
	CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
	CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
	CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
	CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0),
	CXL_CMD(INJECT_POISON, 0x8, 0, 0),
	CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
	CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
	CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
	CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0),
};

/*
@@ -87,6 +84,9 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
 *
 * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
 * is kept up to date with patrol notifications and error management.
 *
 * CXL_MBOX_OP_[GET_,INJECT_,CLEAR_]POISON: These commands require kernel
 * driver orchestration for safety.
 */
static u16 cxl_disabled_raw_commands[] = {
	CXL_MBOX_OP_ACTIVATE_FW,
@@ -95,6 +95,9 @@ static u16 cxl_disabled_raw_commands[] = {
	CXL_MBOX_OP_SET_SHUTDOWN_STATE,
	CXL_MBOX_OP_SCAN_MEDIA,
	CXL_MBOX_OP_GET_SCAN_MEDIA,
	CXL_MBOX_OP_GET_POISON,
	CXL_MBOX_OP_INJECT_POISON,
	CXL_MBOX_OP_CLEAR_POISON,
};

/*
@@ -119,6 +122,43 @@ static bool cxl_is_security_command(u16 opcode)
	return false;
}

static bool cxl_is_poison_command(u16 opcode)
{
#define CXL_MBOX_OP_POISON_CMDS 0x43

	if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS)
		return true;

	return false;
}

static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison,
				       u16 opcode)
{
	switch (opcode) {
	case CXL_MBOX_OP_GET_POISON:
		set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds);
		break;
	case CXL_MBOX_OP_INJECT_POISON:
		set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds);
		break;
	case CXL_MBOX_OP_CLEAR_POISON:
		set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds);
		break;
	case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS:
		set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds);
		break;
	case CXL_MBOX_OP_SCAN_MEDIA:
		set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds);
		break;
	case CXL_MBOX_OP_GET_SCAN_MEDIA:
		set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds);
		break;
	default:
		break;
	}
}

static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
{
	struct cxl_mem_command *c;
@@ -634,13 +674,18 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
		u16 opcode = le16_to_cpu(cel_entry[i].opcode);
		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);

		if (!cmd) {
		if (!cmd && !cxl_is_poison_command(opcode)) {
			dev_dbg(cxlds->dev,
				"Opcode 0x%04x unsupported by driver\n", opcode);
			continue;
		}

		if (cmd)
			set_bit(cmd->info.id, cxlds->enabled_cmds);

		if (cxl_is_poison_command(opcode))
			cxl_set_poison_cmd_enabled(&cxlds->poison, opcode);

		dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);
	}
}
@@ -994,6 +1039,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
	/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
	struct cxl_mbox_identify id;
	struct cxl_mbox_cmd mbox_cmd;
	u32 val;
	int rc;

	mbox_cmd = (struct cxl_mbox_cmd) {
@@ -1017,6 +1063,11 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
	cxlds->lsa_size = le32_to_cpu(id.lsa_size);
	memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));

	if (test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) {
		val = get_unaligned_le24(id.poison_list_max_mer);
		cxlds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX);
	}

	return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
@@ -1107,6 +1158,91 @@ int cxl_set_timestamp(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);

int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
		       struct cxl_region *cxlr)
{
	struct cxl_dev_state *cxlds = cxlmd->cxlds;
	struct cxl_mbox_poison_out *po;
	struct cxl_mbox_poison_in pi;
	struct cxl_mbox_cmd mbox_cmd;
	int nr_records = 0;
	int rc;

	rc = mutex_lock_interruptible(&cxlds->poison.lock);
	if (rc)
		return rc;

	po = cxlds->poison.list_out;
	pi.offset = cpu_to_le64(offset);
	pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);

	mbox_cmd = (struct cxl_mbox_cmd) {
		.opcode = CXL_MBOX_OP_GET_POISON,
		.size_in = sizeof(pi),
		.payload_in = &pi,
		.size_out = cxlds->payload_size,
		.payload_out = po,
		.min_out = struct_size(po, record, 0),
	};

	do {
		rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
		if (rc)
			break;

		for (int i = 0; i < le16_to_cpu(po->count); i++)
			trace_cxl_poison(cxlmd, cxlr, &po->record[i],
					 po->flags, po->overflow_ts,
					 CXL_POISON_TRACE_LIST);

		/* Protect against an uncleared _FLAG_MORE */
		nr_records = nr_records + le16_to_cpu(po->count);
		if (nr_records >= cxlds->poison.max_errors) {
			dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n",
				nr_records);
			break;
		}
	} while (po->flags & CXL_POISON_FLAG_MORE);

	mutex_unlock(&cxlds->poison.lock);
	return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL);

static void free_poison_buf(void *buf)
{
	kvfree(buf);
}

/* Get Poison List output buffer is protected by cxlds->poison.lock */
static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds)
{
	cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL);
	if (!cxlds->poison.list_out)
		return -ENOMEM;

	return devm_add_action_or_reset(cxlds->dev, free_poison_buf,
					cxlds->poison.list_out);
}

int cxl_poison_state_init(struct cxl_dev_state *cxlds)
{
	int rc;

	if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds))
		return 0;

	rc = cxl_poison_alloc_buf(cxlds);
	if (rc) {
		clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds);
		return rc;
	}

	mutex_init(&cxlds->poison.lock);
	return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);

struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
{
	struct cxl_dev_state *cxlds;
+227 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include <linux/idr.h>
#include <linux/pci.h>
#include <cxlmem.h>
#include "trace.h"
#include "core.h"

static DECLARE_RWSEM(cxl_memdev_rwsem);
@@ -106,6 +107,232 @@ static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(numa_node);

static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
{
	struct cxl_dev_state *cxlds = cxlmd->cxlds;
	u64 offset, length;
	int rc = 0;

	/* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
	if (resource_size(&cxlds->pmem_res)) {
		offset = cxlds->pmem_res.start;
		length = resource_size(&cxlds->pmem_res);
		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
		if (rc)
			return rc;
	}
	if (resource_size(&cxlds->ram_res)) {
		offset = cxlds->ram_res.start;
		length = resource_size(&cxlds->ram_res);
		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
		/*
		 * Invalid Physical Address is not an error for
		 * volatile addresses. Device support is optional.
		 */
		if (rc == -EFAULT)
			rc = 0;
	}
	return rc;
}

int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
{
	struct cxl_port *port;
	int rc;

	port = dev_get_drvdata(&cxlmd->dev);
	if (!port || !is_cxl_endpoint(port))
		return -EINVAL;

	rc = down_read_interruptible(&cxl_dpa_rwsem);
	if (rc)
		return rc;

	if (port->commit_end == -1) {
		/* No regions mapped to this memdev */
		rc = cxl_get_poison_by_memdev(cxlmd);
	} else {
		/* Regions mapped, collect poison by endpoint */
		rc =  cxl_get_poison_by_endpoint(port);
	}
	up_read(&cxl_dpa_rwsem);

	return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);

struct cxl_dpa_to_region_context {
	struct cxl_region *cxlr;
	u64 dpa;
};

static int __cxl_dpa_to_region(struct device *dev, void *arg)
{
	struct cxl_dpa_to_region_context *ctx = arg;
	struct cxl_endpoint_decoder *cxled;
	u64 dpa = ctx->dpa;

	if (!is_endpoint_decoder(dev))
		return 0;

	cxled = to_cxl_endpoint_decoder(dev);
	if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
		return 0;

	if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
		return 0;

	dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
		dev_name(&cxled->cxld.region->dev));

	ctx->cxlr = cxled->cxld.region;

	return 1;
}

static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
{
	struct cxl_dpa_to_region_context ctx;
	struct cxl_port *port;

	ctx = (struct cxl_dpa_to_region_context) {
		.dpa = dpa,
	};
	port = dev_get_drvdata(&cxlmd->dev);
	if (port && is_cxl_endpoint(port) && port->commit_end != -1)
		device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);

	return ctx.cxlr;
}

static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
{
	struct cxl_dev_state *cxlds = cxlmd->cxlds;

	if (!IS_ENABLED(CONFIG_DEBUG_FS))
		return 0;

	if (!resource_size(&cxlds->dpa_res)) {
		dev_dbg(cxlds->dev, "device has no dpa resource\n");
		return -EINVAL;
	}
	if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
		dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
			dpa, &cxlds->dpa_res);
		return -EINVAL;
	}
	if (!IS_ALIGNED(dpa, 64)) {
		dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
		return -EINVAL;
	}

	return 0;
}

int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
{
	struct cxl_dev_state *cxlds = cxlmd->cxlds;
	struct cxl_mbox_inject_poison inject;
	struct cxl_poison_record record;
	struct cxl_mbox_cmd mbox_cmd;
	struct cxl_region *cxlr;
	int rc;

	if (!IS_ENABLED(CONFIG_DEBUG_FS))
		return 0;

	rc = down_read_interruptible(&cxl_dpa_rwsem);
	if (rc)
		return rc;

	rc = cxl_validate_poison_dpa(cxlmd, dpa);
	if (rc)
		goto out;

	inject.address = cpu_to_le64(dpa);
	mbox_cmd = (struct cxl_mbox_cmd) {
		.opcode = CXL_MBOX_OP_INJECT_POISON,
		.size_in = sizeof(inject),
		.payload_in = &inject,
	};
	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
	if (rc)
		goto out;

	cxlr = cxl_dpa_to_region(cxlmd, dpa);
	if (cxlr)
		dev_warn_once(cxlds->dev,
			      "poison inject dpa:%#llx region: %s\n", dpa,
			      dev_name(&cxlr->dev));

	record = (struct cxl_poison_record) {
		.address = cpu_to_le64(dpa),
		.length = cpu_to_le32(1),
	};
	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
out:
	up_read(&cxl_dpa_rwsem);

	return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);

int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
{
	struct cxl_dev_state *cxlds = cxlmd->cxlds;
	struct cxl_mbox_clear_poison clear;
	struct cxl_poison_record record;
	struct cxl_mbox_cmd mbox_cmd;
	struct cxl_region *cxlr;
	int rc;

	if (!IS_ENABLED(CONFIG_DEBUG_FS))
		return 0;

	rc = down_read_interruptible(&cxl_dpa_rwsem);
	if (rc)
		return rc;

	rc = cxl_validate_poison_dpa(cxlmd, dpa);
	if (rc)
		goto out;

	/*
	 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
	 * is defined to accept 64 bytes of write-data, along with the
	 * address to clear. This driver uses zeroes as write-data.
	 */
	clear = (struct cxl_mbox_clear_poison) {
		.address = cpu_to_le64(dpa)
	};

	mbox_cmd = (struct cxl_mbox_cmd) {
		.opcode = CXL_MBOX_OP_CLEAR_POISON,
		.size_in = sizeof(clear),
		.payload_in = &clear,
	};

	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
	if (rc)
		goto out;

	cxlr = cxl_dpa_to_region(cxlmd, dpa);
	if (cxlr)
		dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n",
			      dpa, dev_name(&cxlr->dev));

	record = (struct cxl_poison_record) {
		.address = cpu_to_le64(dpa),
		.length = cpu_to_le32(1),
	};
	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
out:
	up_read(&cxl_dpa_rwsem);

	return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL);

static struct attribute *cxl_memdev_attributes[] = {
	&dev_attr_serial.attr,
	&dev_attr_firmware_version.attr,
Loading