Commit 9c38475c authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-5.13-2021-05-05' of git://git.infradead.org/nvme into block-5.13

Pull NVMe fixes from Christoph:

"nvme updates for Linux 5.13

 - reset the bdev to ns head when failover (Daniel Wagner)
 - remove unsupported command noise (Keith Busch)
 - misc passthrough improvements (Kanchan Joshi)
 - fix controller ioctl through ns_head (Minwoo Im)
 - fix controller timeouts during reset (Tao Chiu)"

* tag 'nvme-5.13-2021-05-05' of git://git.infradead.org/nvme:
  nvmet: remove unsupported command noise
  nvme-multipath: reset bdev to ns head when failover
  nvme-pci: fix controller reset hang when racing with nvme_timeout
  nvme: move the fabrics queue ready check routines to core
  nvme: avoid memset for passthrough requests
  nvme: add nvme_get_ns helper
  nvme: fix controller ioctl through ns_head
parents cd2c7545 4a203425
Loading
Loading
Loading
Loading
+70 −28
Original line number Diff line number Diff line
@@ -576,6 +576,11 @@ static void nvme_free_ns(struct kref *kref)
	kfree(ns);
}

static inline bool nvme_get_ns(struct nvme_ns *ns)
{
	return kref_get_unless_zero(&ns->kref);
}

void nvme_put_ns(struct nvme_ns *ns)
{
	kref_put(&ns->kref, nvme_free_ns);
@@ -584,9 +589,6 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU);

static inline void nvme_clear_nvme_request(struct request *req)
{
	struct nvme_command *cmd = nvme_req(req)->cmd;

	memset(cmd, 0, sizeof(*cmd));
	nvme_req(req)->retries = 0;
	nvme_req(req)->flags = 0;
	req->rq_flags |= RQF_DONTPREP;
@@ -637,6 +639,66 @@ static struct request *nvme_alloc_request_qid(struct request_queue *q,
	return req;
}

/*
 * For something we're not in a state to send to the device the default action
 * is to busy it and retry it after the controller state is recovered.  However,
 * if the controller is deleting or if anything is marked for failfast or
 * nvme multipath it is immediately failed.
 *
 * Note: commands used to initialize the controller will be marked for failfast.
 * Note: nvme cli/ioctl commands are marked for failfast.
 */
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
		struct request *rq)
{
	if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
	    ctrl->state != NVME_CTRL_DEAD &&
	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
		return BLK_STS_RESOURCE;
	return nvme_host_path_error(rq);
}
EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);

bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live)
{
	struct nvme_request *req = nvme_req(rq);

	/*
	 * currently we have a problem sending passthru commands
	 * on the admin_q if the controller is not LIVE because we can't
	 * make sure that they are going out after the admin connect,
	 * controller enable and/or other commands in the initialization
	 * sequence. until the controller will be LIVE, fail with
	 * BLK_STS_RESOURCE so that they will be rescheduled.
	 */
	if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD))
		return false;

	if (ctrl->ops->flags & NVME_F_FABRICS) {
		/*
		 * Only allow commands on a live queue, except for the connect
		 * command, which is require to set the queue live in the
		 * appropinquate states.
		 */
		switch (ctrl->state) {
		case NVME_CTRL_CONNECTING:
			if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
			    req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
				return true;
			break;
		default:
			break;
		case NVME_CTRL_DEAD:
			return false;
		}
	}

	return queue_live;
}
EXPORT_SYMBOL_GPL(__nvme_check_ready);

static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
{
	struct nvme_command c;
@@ -898,8 +960,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
	struct nvme_command *cmd = nvme_req(req)->cmd;
	blk_status_t ret = BLK_STS_OK;

	if (!(req->rq_flags & RQF_DONTPREP))
	if (!(req->rq_flags & RQF_DONTPREP)) {
		nvme_clear_nvme_request(req);
		memset(cmd, 0, sizeof(*cmd));
	}

	switch (req_op(req)) {
	case REQ_OP_DRV_IN:
@@ -1494,7 +1558,7 @@ static int nvme_ns_open(struct nvme_ns *ns)
	/* should never be called due to GENHD_FL_HIDDEN */
	if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head)))
		goto fail;
	if (!kref_get_unless_zero(&ns->kref))
	if (!nvme_get_ns(ns))
		goto fail;
	if (!try_module_get(ns->ctrl->ops->module))
		goto fail_put_ns;
@@ -1999,28 +2063,6 @@ static const struct block_device_operations nvme_bdev_ops = {
	.pr_ops		= &nvme_pr_ops,
};

#ifdef CONFIG_NVME_MULTIPATH
struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys)
{
	struct nvme_ctrl *ctrl;
	int ret;

	ret = mutex_lock_killable(&nvme_subsystems_lock);
	if (ret)
		return ERR_PTR(ret);
	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
		if (ctrl->state == NVME_CTRL_LIVE)
			goto found;
	}
	mutex_unlock(&nvme_subsystems_lock);
	return ERR_PTR(-EWOULDBLOCK);
found:
	nvme_get_ctrl(ctrl);
	mutex_unlock(&nvme_subsystems_lock);
	return ctrl;
}
#endif /* CONFIG_NVME_MULTIPATH */

static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
{
	unsigned long timeout =
@@ -3604,7 +3646,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
	down_read(&ctrl->namespaces_rwsem);
	list_for_each_entry(ns, &ctrl->namespaces, list) {
		if (ns->head->ns_id == nsid) {
			if (!kref_get_unless_zero(&ns->kref))
			if (!nvme_get_ns(ns))
				continue;
			ret = ns;
			break;
+0 −57
Original line number Diff line number Diff line
@@ -533,63 +533,6 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
	return NULL;
}

/*
 * For something we're not in a state to send to the device the default action
 * is to busy it and retry it after the controller state is recovered.  However,
 * if the controller is deleting or if anything is marked for failfast or
 * nvme multipath it is immediately failed.
 *
 * Note: commands used to initialize the controller will be marked for failfast.
 * Note: nvme cli/ioctl commands are marked for failfast.
 */
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
		struct request *rq)
{
	if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
	    ctrl->state != NVME_CTRL_DEAD &&
	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
		return BLK_STS_RESOURCE;
	return nvme_host_path_error(rq);
}
EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);

bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live)
{
	struct nvme_request *req = nvme_req(rq);

	/*
	 * currently we have a problem sending passthru commands
	 * on the admin_q if the controller is not LIVE because we can't
	 * make sure that they are going out after the admin connect,
	 * controller enable and/or other commands in the initialization
	 * sequence. until the controller will be LIVE, fail with
	 * BLK_STS_RESOURCE so that they will be rescheduled.
	 */
	if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD))
		return false;

	/*
	 * Only allow commands on a live queue, except for the connect command,
	 * which is require to set the queue live in the appropinquate states.
	 */
	switch (ctrl->state) {
	case NVME_CTRL_CONNECTING:
		if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
		    req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
			return true;
		break;
	default:
		break;
	case NVME_CTRL_DEAD:
		return false;
	}

	return queue_live;
}
EXPORT_SYMBOL_GPL(__nvmf_check_ready);

static const match_table_t opt_tokens = {
	{ NVMF_OPT_TRANSPORT,		"transport=%s"		},
	{ NVMF_OPT_TRADDR,		"traddr=%s"		},
+0 −13
Original line number Diff line number Diff line
@@ -184,20 +184,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
		struct request *rq);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live);
bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
		struct nvmf_ctrl_options *opts);

static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live)
{
	if (likely(ctrl->state == NVME_CTRL_LIVE ||
		   ctrl->state == NVME_CTRL_DELETING))
		return true;
	return __nvmf_check_ready(ctrl, rq, queue_live);
}

#endif /* _NVME_FABRICS_H */
+2 −2
Original line number Diff line number Diff line
@@ -2766,8 +2766,8 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
	blk_status_t ret;

	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
	    !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
		return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
	    !nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
		return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);

	ret = nvme_setup_cmd(ns, rq);
	if (ret)
+41 −24
Original line number Diff line number Diff line
@@ -370,41 +370,45 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}

#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head,
		unsigned int cmd, void __user *argp)
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
		void __user *argp, struct nvme_ns_head *head, int srcu_idx)
{
	struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys);
	struct nvme_ctrl *ctrl = ns->ctrl;
	int ret;

	if (IS_ERR(ctrl))
		return PTR_ERR(ctrl);
	ret = nvme_ctrl_ioctl(ctrl, cmd, argp);
	nvme_put_ctrl(ctrl);
	return ret;
}
	nvme_get_ctrl(ns->ctrl);
	nvme_put_ns_from_disk(head, srcu_idx);
	ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);

static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head,
		unsigned int cmd, void __user *argp)
{
	int srcu_idx = srcu_read_lock(&head->srcu);
	struct nvme_ns *ns = nvme_find_path(head);
	int ret = -EWOULDBLOCK;

	if (ns)
		ret = nvme_ns_ioctl(ns, cmd, argp);
	srcu_read_unlock(&head->srcu, srcu_idx);
	nvme_put_ctrl(ctrl);
	return ret;
}

int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
		unsigned int cmd, unsigned long arg)
{
	struct nvme_ns_head *head = bdev->bd_disk->private_data;
	struct nvme_ns_head *head = NULL;
	void __user *argp = (void __user *)arg;
	struct nvme_ns *ns;
	int srcu_idx, ret;

	ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
	if (unlikely(!ns))
		return -EWOULDBLOCK;

	/*
	 * Handle ioctls that apply to the controller instead of the namespace
	 * seperately and drop the ns SRCU reference early.  This avoids a
	 * deadlock when deleting namespaces using the passthrough interface.
	 */
	if (is_ctrl_ioctl(cmd))
		return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
	return nvme_ns_head_ns_ioctl(head, cmd, argp);
		ret = nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
	else {
		ret = nvme_ns_ioctl(ns, cmd, argp);
		nvme_put_ns_from_disk(head, srcu_idx);
	}

	return ret;
}

long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
@@ -414,10 +418,23 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
	struct nvme_ns_head *head =
		container_of(cdev, struct nvme_ns_head, cdev);
	void __user *argp = (void __user *)arg;
	struct nvme_ns *ns;
	int srcu_idx, ret;

	srcu_idx = srcu_read_lock(&head->srcu);
	ns = nvme_find_path(head);
	if (!ns) {
		srcu_read_unlock(&head->srcu, srcu_idx);
		return -EWOULDBLOCK;
	}

	if (is_ctrl_ioctl(cmd))
		return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
	return nvme_ns_head_ns_ioctl(head, cmd, argp);
		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);

	ret = nvme_ns_ioctl(ns, cmd, argp);
	nvme_put_ns_from_disk(head, srcu_idx);

	return ret;
}
#endif /* CONFIG_NVME_MULTIPATH */

Loading