Commit ccadf131 authored by Davidlohr Bueso's avatar Davidlohr Bueso Committed by Dan Williams
Browse files

cxl/mbox: Add background cmd handling machinery



This adds support for handling background operations, as defined in
the CXL 3.0 spec. Commands that can take too long (over ~2 seconds)
can run in the background asynchronously (to the hardware).

The driver will deal with such commands synchronously, blocking all
other incoming commands for a specified period of time, allowing
time-slicing the command such that the caller can send incremental
requests to avoid monopolizing the driver/device. Any out of sync
(timeout) between the driver and hardware is just disregarded as
an invalid state until the next successful submission. Such timeouts
are considered a rare occurrence, either a real device problem or a
driver issue that needs to reduce the size of the background operation
to fit the timeout.

On devices where mbox interrupts are supported, this will still use
a poller that will wakeup in the specified wait intervals. The irq
handler will simply awake the blocked cmd, which is also safe vs a
task that is either waking (timing out) or already awoken. Similarly
any irq setup error during the probing falls back to polling, thus
avoids unnecessarily erroring out.

Signed-off-by: default avatarDavidlohr Bueso <dave@stgolabs.net>
Link: https://lore.kernel.org/r/20230523170927.20685-5-dave@stgolabs.net


Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 9f7a320d
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -220,7 +220,8 @@ int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
	if (rc)
		return rc;

	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS)
	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS &&
	    mbox_cmd->return_code != CXL_MBOX_CMD_RC_BACKGROUND)
		return cxl_mbox_cmd_rc2errno(mbox_cmd);

	if (!out_size)
+8 −0
Original line number Diff line number Diff line
@@ -176,14 +176,22 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
/* CXL 2.0 8.2.8.4 Mailbox Registers */
#define CXLDEV_MBOX_CAPS_OFFSET 0x00
#define   CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
#define   CXLDEV_MBOX_CAP_BG_CMD_IRQ BIT(6)
#define   CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK GENMASK(10, 7)
#define CXLDEV_MBOX_CTRL_OFFSET 0x04
#define   CXLDEV_MBOX_CTRL_DOORBELL BIT(0)
#define   CXLDEV_MBOX_CTRL_BG_CMD_IRQ BIT(2)
#define CXLDEV_MBOX_CMD_OFFSET 0x08
#define   CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
#define   CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK GENMASK_ULL(36, 16)
#define CXLDEV_MBOX_STATUS_OFFSET 0x10
#define   CXLDEV_MBOX_STATUS_BG_CMD BIT(0)
#define   CXLDEV_MBOX_STATUS_RET_CODE_MASK GENMASK_ULL(47, 32)
#define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18
#define   CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
#define   CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK GENMASK_ULL(22, 16)
#define   CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK GENMASK_ULL(47, 32)
#define   CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48)
#define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20

/*
+7 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#include <uapi/linux/cxl_mem.h>
#include <linux/cdev.h>
#include <linux/uuid.h>
#include <linux/rcuwait.h>
#include "cxl.h"

/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
@@ -108,6 +109,9 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
 *            variable sized output commands, it tells the exact number of bytes
 *            written.
 * @min_out: (input) internal command output payload size validation
 * @poll_count: (input) Number of timeouts to attempt.
 * @poll_interval_ms: (input) Time between mailbox background command polling
 *                    interval timeouts.
 * @return_code: (output) Error code returned from hardware.
 *
 * This is the primary mechanism used to send commands to the hardware.
@@ -123,6 +127,8 @@ struct cxl_mbox_cmd {
	size_t size_in;
	size_t size_out;
	size_t min_out;
	int poll_count;
	int poll_interval_ms;
	u16 return_code;
};

@@ -331,6 +337,7 @@ struct cxl_dev_state {
	struct cxl_event_state event;
	struct cxl_poison_state poison;

	struct rcuwait mbox_wait;
	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
};

+89 −0
Original line number Diff line number Diff line
@@ -105,6 +105,26 @@ static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq,
					 NULL, dev_id);
}

static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
{
	u64 reg;

	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
	return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100;
}

static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
{
	struct cxl_dev_id *dev_id = id;
	struct cxl_dev_state *cxlds = dev_id->cxlds;

	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
	if (cxl_mbox_background_complete(cxlds))
		rcuwait_wake_up(&cxlds->mbox_wait);

	return IRQ_HANDLED;
}

/**
 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
 * @cxlds: The device state to communicate with.
@@ -198,6 +218,50 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
	mbox_cmd->return_code =
		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);

	/*
	 * Handle the background command in a synchronous manner.
	 *
	 * All other mailbox commands will serialize/queue on the mbox_mutex,
	 * which we currently hold. Furthermore this also guarantees that
	 * cxl_mbox_background_complete() checks are safe amongst each other,
	 * in that no new bg operation can occur in between.
	 *
	 * Background operations are timesliced in accordance with the nature
	 * of the command. In the event of timeout, the mailbox state is
	 * indeterminate until the next successful command submission and the
	 * driver can get back in sync with the hardware state.
	 */
	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
		u64 bg_status_reg;
		int i, timeout = mbox_cmd->poll_interval_ms;

		dev_dbg(dev, "Mailbox background operation (0x%04x) started\n",
			mbox_cmd->opcode);

		for (i = 0; i < mbox_cmd->poll_count; i++) {
			if (rcuwait_wait_event_timeout(&cxlds->mbox_wait,
				       cxl_mbox_background_complete(cxlds),
				       TASK_UNINTERRUPTIBLE,
				       msecs_to_jiffies(timeout)) > 0)
				break;
		}

		if (!cxl_mbox_background_complete(cxlds)) {
			dev_err(dev, "timeout waiting for background (%d ms)\n",
				timeout * mbox_cmd->poll_count);
			return -ETIMEDOUT;
		}

		bg_status_reg = readq(cxlds->regs.mbox +
				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
		mbox_cmd->return_code =
			FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
				  bg_status_reg);
		dev_dbg(dev,
			"Mailbox background operation (0x%04x) completed\n",
			mbox_cmd->opcode);
	}

	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
		dev_dbg(dev, "Mailbox operation had an error: %s\n",
			cxl_mbox_cmd_rc2str(mbox_cmd));
@@ -292,6 +356,31 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
	dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
		cxlds->payload_size);

	rcuwait_init(&cxlds->mbox_wait);

	if (cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) {
		u32 ctrl;
		int irq, msgnum;
		struct pci_dev *pdev = to_pci_dev(cxlds->dev);

		msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
		irq = pci_irq_vector(pdev, msgnum);
		if (irq < 0)
			goto mbox_poll;

		if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq, NULL))
			goto mbox_poll;

		/* enable background command mbox irq support */
		ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
		ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ;
		writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);

		return 0;
	}

mbox_poll:
	dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported");
	return 0;
}