Commit 8f415307 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-6.2-2022-12-07' of git://git.infradead.org/nvme into for-6.2/block

Pull NVMe updates from Christoph:

"nvme updates for Linux 6.2

 - fix and cleanup nvme-fc req allocation (Chaitanya Kulkarni)
 - use the common tagset helpers in nvme-pci driver (Christoph Hellwig)
 - cleanup the nvme-pci removal path (Christoph Hellwig)
 - use kstrtobool() instead of strtobool (Christophe JAILLET)
 - allow unprivileged passthrough of Identify Controller (Joel Granados)
 - support io stats on the mpath device (Sagi Grimberg)
 - minor nvmet cleanup (Sagi Grimberg)"

* tag 'nvme-6.2-2022-12-07' of git://git.infradead.org/nvme: (22 commits)
  nvmet: don't open-code NVME_NS_ATTR_RO enumeration
  nvme-pci: use the tagset alloc/free helpers
  nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set
  nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers
  nvme: consolidate setting the tagset flags
  nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set
  nvme-pci: split out a nvme_pci_ctrl_is_dead helper
  nvme-pci: return early on ctrl state mismatch in nvme_reset_work
  nvme-pci: rename nvme_disable_io_queues
  nvme-pci: cleanup nvme_suspend_queue
  nvme-pci: remove nvme_pci_disable
  nvme-pci: remove nvme_disable_admin_queue
  nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl
  nvme: use nvme_wait_ready in nvme_shutdown_ctrl
  nvme-apple: fix controller shutdown in apple_nvme_disable
  nvme-fc: move common code into helper
  nvme-fc: avoid null pointer dereference
  nvme: allow unprivileged passthrough of Identify Controller
  nvme-multipath: support io stats on the mpath device
  nvme: introduce nvme_start_request
  ...
parents c34b7ac6 19b00e00
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -763,7 +763,7 @@ static blk_status_t apple_nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
			goto out_free_cmd;
	}

	blk_mq_start_request(req);
	nvme_start_request(req);
	apple_nvme_submit_cmd(q, cmnd);
	return BLK_STS_OK;

@@ -829,9 +829,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
			apple_nvme_remove_cq(anv);
		}

		if (shutdown)
			nvme_shutdown_ctrl(&anv->ctrl);
		nvme_disable_ctrl(&anv->ctrl);
		nvme_disable_ctrl(&anv->ctrl, shutdown);
	}

	WRITE_ONCE(anv->ioq.enabled, false);
+43 −55
Original line number Diff line number Diff line
@@ -384,6 +384,8 @@ static inline void nvme_end_req(struct request *req)
		nvme_log_error(req);
	nvme_end_req_zoned(req);
	nvme_trace_bio_complete(req);
	if (req->cmd_flags & REQ_NVME_MPATH)
		nvme_mpath_end_request(req);
	blk_mq_end_request(req, status);
}

@@ -2250,16 +2252,17 @@ static const struct block_device_operations nvme_bdev_ops = {
	.pr_ops		= &nvme_pr_ops,
};

static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 mask, u32 val,
		u32 timeout, const char *op)
{
	unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
	u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
	unsigned long timeout_jiffies = jiffies + timeout * HZ;
	u32 csts;
	int ret;

	while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
		if (csts == ~0)
			return -ENODEV;
		if ((csts & NVME_CSTS_RDY) == bit)
		if ((csts & mask) == val)
			break;

		usleep_range(1000, 2000);
@@ -2268,7 +2271,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
		if (time_after(jiffies, timeout_jiffies)) {
			dev_err(ctrl->device,
				"Device not ready; aborting %s, CSTS=0x%x\n",
				enabled ? "initialisation" : "reset", csts);
				op, csts);
			return -ENODEV;
		}
	}
@@ -2276,27 +2279,29 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
	return ret;
}

/*
 * If the device has been passed off to us in an enabled state, just clear
 * the enabled bit.  The spec says we should set the 'shutdown notification
 * bits', but doing so may cause the device to complete commands to the
 * admin queue ... and we don't know what memory that might be pointing at!
 */
int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
{
	int ret;

	ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
	if (shutdown)
		ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
	else
		ctrl->ctrl_config &= ~NVME_CC_ENABLE;

	ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
	if (ret)
		return ret;

	if (shutdown) {
		return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK,
				       NVME_CSTS_SHST_CMPLT,
				       ctrl->shutdown_timeout, "shutdown");
	}
	if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
		msleep(NVME_QUIRK_DELAY_AMOUNT);

	return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
	return nvme_wait_ready(ctrl, NVME_CSTS_RDY, 0,
			       (NVME_CAP_TIMEOUT(ctrl->cap) + 1) / 2, "reset");
}
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);

@@ -2361,41 +2366,11 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
	ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
	if (ret)
		return ret;
	return nvme_wait_ready(ctrl, timeout, true);
	return nvme_wait_ready(ctrl, NVME_CSTS_RDY, NVME_CSTS_RDY,
			       (timeout + 1) / 2, "initialisation");
}
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);

int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
	unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
	u32 csts;
	int ret;

	ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
	ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;

	ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
	if (ret)
		return ret;

	while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
		if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
			break;

		msleep(100);
		if (fatal_signal_pending(current))
			return -EINTR;
		if (time_after(jiffies, timeout)) {
			dev_err(ctrl->device,
				"Device shutdown incomplete; abort shutdown\n");
			return -ENODEV;
		}
	}

	return ret;
}
EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);

static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
{
	__le64 ts;
@@ -4856,8 +4831,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
EXPORT_SYMBOL_GPL(nvme_complete_async_event);

int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
		const struct blk_mq_ops *ops, unsigned int flags,
		unsigned int cmd_size)
		const struct blk_mq_ops *ops, unsigned int cmd_size)
{
	int ret;

@@ -4867,7 +4841,9 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
	if (ctrl->ops->flags & NVME_F_FABRICS)
		set->reserved_tags = NVMF_RESERVED_TAGS;
	set->numa_node = ctrl->numa_node;
	set->flags = flags;
	set->flags = BLK_MQ_F_NO_SCHED;
	if (ctrl->ops->flags & NVME_F_BLOCKING)
		set->flags |= BLK_MQ_F_BLOCKING;
	set->cmd_size = cmd_size;
	set->driver_data = ctrl;
	set->nr_hw_queues = 1;
@@ -4915,7 +4891,7 @@ void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl)
EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set);

int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
		const struct blk_mq_ops *ops, unsigned int flags,
		const struct blk_mq_ops *ops, unsigned int nr_maps,
		unsigned int cmd_size)
{
	int ret;
@@ -4923,15 +4899,23 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
	memset(set, 0, sizeof(*set));
	set->ops = ops;
	set->queue_depth = ctrl->sqsize + 1;
	/*
	 * Some Apple controllers requires tags to be unique across admin and
	 * the (only) I/O queue, so reserve the first 32 tags of the I/O queue.
	 */
	if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS)
		set->reserved_tags = NVME_AQ_DEPTH;
	else if (ctrl->ops->flags & NVME_F_FABRICS)
		set->reserved_tags = NVMF_RESERVED_TAGS;
	set->numa_node = ctrl->numa_node;
	set->flags = flags;
	set->flags = BLK_MQ_F_SHOULD_MERGE;
	if (ctrl->ops->flags & NVME_F_BLOCKING)
		set->flags |= BLK_MQ_F_BLOCKING;
	set->cmd_size = cmd_size,
	set->driver_data = ctrl;
	set->nr_hw_queues = ctrl->queue_count - 1;
	set->timeout = NVME_IO_TIMEOUT;
	if (ops->map_queues)
		set->nr_maps = ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
	set->nr_maps = nr_maps;
	ret = blk_mq_alloc_tag_set(set);
	if (ret)
		return ret;
@@ -5215,6 +5199,8 @@ EXPORT_SYMBOL_GPL(nvme_start_freeze);

void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl)
{
	if (!ctrl->tagset)
		return;
	if (!test_and_set_bit(NVME_CTRL_STOPPED, &ctrl->flags))
		blk_mq_quiesce_tagset(ctrl->tagset);
	else
@@ -5224,6 +5210,8 @@ EXPORT_SYMBOL_GPL(nvme_quiesce_io_queues);

void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl)
{
	if (!ctrl->tagset)
		return;
	if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags))
		blk_mq_unquiesce_tagset(ctrl->tagset);
}
+21 −9
Original line number Diff line number Diff line
@@ -1701,6 +1701,15 @@ nvme_fc_handle_ls_rqst_work(struct work_struct *work)
	spin_unlock_irqrestore(&rport->lock, flags);
}

static
void nvme_fc_rcv_ls_req_err_msg(struct nvme_fc_lport *lport,
				struct fcnvme_ls_rqst_w0 *w0)
{
	dev_info(lport->dev, "RCV %s LS failed: No memory\n",
		(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
			nvmefc_ls_names[w0->ls_cmd] : "");
}

/**
 * nvme_fc_rcv_ls_req - transport entry point called by an LLDD
 *                       upon the reception of a NVME LS request.
@@ -1754,13 +1763,16 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
	}

	lsop = kzalloc(sizeof(*lsop), GFP_KERNEL);
	if (!lsop) {
		nvme_fc_rcv_ls_req_err_msg(lport, w0);
		ret = -ENOMEM;
		goto out_put;
	}

	lsop->rqstbuf = kzalloc(sizeof(*lsop->rqstbuf), GFP_KERNEL);
	lsop->rspbuf = kzalloc(sizeof(*lsop->rspbuf), GFP_KERNEL);
	if (!lsop || !lsop->rqstbuf || !lsop->rspbuf) {
		dev_info(lport->dev,
			"RCV %s LS failed: No memory\n",
			(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
				nvmefc_ls_names[w0->ls_cmd] : "");
	if (!lsop->rqstbuf || !lsop->rspbuf) {
		nvme_fc_rcv_ls_req_err_msg(lport, w0);
		ret = -ENOMEM;
		goto out_free;
	}
@@ -2506,7 +2518,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
	 * Other transports, which don't have link-level contexts bound
	 * to sqe's, would try to gracefully shutdown the controller by
	 * writing the registers for shutdown and polling (call
	 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
	 * nvme_disable_ctrl()). Given a bunch of i/o was potentially
	 * just aborted and we will wait on those contexts, and given
	 * there was no indication of how live the controlelr is on the
	 * link, don't send more io to create more contexts for the
@@ -2733,7 +2745,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
	atomic_set(&op->state, FCPOP_STATE_ACTIVE);

	if (!(op->flags & FCOP_FLAGS_AEN))
		blk_mq_start_request(op->rq);
		nvme_start_request(op->rq);

	cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn));
	ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
@@ -2904,7 +2916,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
	nvme_fc_init_io_queues(ctrl);

	ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set,
			&nvme_fc_mq_ops, BLK_MQ_F_SHOULD_MERGE,
			&nvme_fc_mq_ops, 1,
			struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
				    ctrl->lport->ops->fcprqst_priv_sz));
	if (ret)
@@ -3510,7 +3522,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	nvme_fc_init_queue(ctrl, 0);

	ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
			&nvme_fc_admin_mq_ops, BLK_MQ_F_NO_SCHED,
			&nvme_fc_admin_mq_ops,
			struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
				    ctrl->lport->ops->fcprqst_priv_sz));
	if (ret)
+2 −0
Original line number Diff line number Diff line
@@ -34,6 +34,8 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
			case NVME_ID_CNS_NS:
			case NVME_ID_CNS_CS_NS:
			case NVME_ID_CNS_NS_CS_INDEP:
			case NVME_ID_CNS_CS_CTRL:
			case NVME_ID_CNS_CTRL:
				return true;
			}
		}
+26 −0
Original line number Diff line number Diff line
@@ -114,6 +114,31 @@ void nvme_failover_req(struct request *req)
	kblockd_schedule_work(&ns->head->requeue_work);
}

void nvme_mpath_start_request(struct request *rq)
{
	struct nvme_ns *ns = rq->q->queuedata;
	struct gendisk *disk = ns->head->disk;

	if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
		return;

	nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
					blk_rq_bytes(rq) >> SECTOR_SHIFT,
					req_op(rq), jiffies);
}
EXPORT_SYMBOL_GPL(nvme_mpath_start_request);

void nvme_mpath_end_request(struct request *rq)
{
	struct nvme_ns *ns = rq->q->queuedata;

	if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
		return;
	bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
		nvme_req(rq)->start_time);
}

void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
	struct nvme_ns *ns;
@@ -503,6 +528,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)

	blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
	blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue);
	blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue);
	/*
	 * This assumes all controllers that refer to a namespace either
	 * support poll queues or not.  That is not a strict guarantee,
Loading