Commit d38b4d28 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-5.12-20210319' of git://git.infradead.org/nvme into block-5.12

Pull NVMe updates from Christoph:

"nvme fixes for 5.12

 - fix tag allocation for keep alive
 - fix a unit mismatch for the Write Zeroes limits
 - various TCP transport fixes (Sagi Grimberg, Elad Grupi)
 - fix iosqes and iocqes validation for discovery controllers (Sagi Grimberg)"

* tag 'nvme-5.12-20210319' of git://git.infradead.org/nvme:
  nvmet-tcp: fix kmap leak when data digest in use
  nvmet: don't check iosqes,iocqes for discovery controllers
  nvme-rdma: fix possible hang when failing to set io queues
  nvme-tcp: fix possible hang when failing to set io queues
  nvme-tcp: fix misuse of __smp_processor_id with preemption enabled
  nvme-tcp: fix a NULL deref when receiving a 0-length r2t PDU
  nvme: fix Write Zeroes limitations
  nvme: allocate the keep alive request using BLK_MQ_REQ_NOWAIT
  nvme: merge nvme_keep_alive into nvme_keep_alive_work
  nvme-fabrics: only reserve a single tag
parents 1e28eed1 bac04454
Loading
Loading
Loading
Loading
+21 −43
Original line number Diff line number Diff line
@@ -1226,28 +1226,12 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
		queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}

static int nvme_keep_alive(struct nvme_ctrl *ctrl)
{
	struct request *rq;

	rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
			BLK_MQ_REQ_RESERVED);
	if (IS_ERR(rq))
		return PTR_ERR(rq);

	rq->timeout = ctrl->kato * HZ;
	rq->end_io_data = ctrl;

	blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);

	return 0;
}

static void nvme_keep_alive_work(struct work_struct *work)
{
	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
			struct nvme_ctrl, ka_work);
	bool comp_seen = ctrl->comp_seen;
	struct request *rq;

	if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {
		dev_dbg(ctrl->device,
@@ -1257,12 +1241,18 @@ static void nvme_keep_alive_work(struct work_struct *work)
		return;
	}

	if (nvme_keep_alive(ctrl)) {
	rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
				BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
	if (IS_ERR(rq)) {
		/* allocation failure, reset the controller */
		dev_err(ctrl->device, "keep-alive failed\n");
		dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq));
		nvme_reset_ctrl(ctrl);
		return;
	}

	rq->timeout = ctrl->kato * HZ;
	rq->end_io_data = ctrl;
	blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);
}

static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
@@ -1964,30 +1954,18 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
}

static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
{
	u64 max_blocks;

	if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
	    (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
		return;
/*
	 * Even though NVMe spec explicitly states that MDTS is not
	 * applicable to the write-zeroes:- "The restriction does not apply to
	 * commands that do not transfer data between the host and the
	 * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
	 * In order to be more cautious use controller's max_hw_sectors value
	 * to configure the maximum sectors for the write-zeroes which is
	 * configured based on the controller's MDTS field in the
	 * nvme_init_identify() if available.
 * Even though NVMe spec explicitly states that MDTS is not applicable to the
 * write-zeroes, we are cautious and limit the size to the controllers
 * max_hw_sectors value, which is based on the MDTS field and possibly other
 * limiting factors.
 */
	if (ns->ctrl->max_hw_sectors == UINT_MAX)
		max_blocks = (u64)USHRT_MAX + 1;
	else
		max_blocks = ns->ctrl->max_hw_sectors + 1;

	blk_queue_max_write_zeroes_sectors(disk->queue,
					   nvme_lba_to_sect(ns, max_blocks));
static void nvme_config_write_zeroes(struct request_queue *q,
		struct nvme_ctrl *ctrl)
{
	if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) &&
	    !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
		blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors);
}

static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
@@ -2159,7 +2137,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
	set_capacity_and_notify(disk, capacity);

	nvme_config_discard(disk, ns);
	nvme_config_write_zeroes(disk, ns);
	nvme_config_write_zeroes(disk->queue, ns->ctrl);

	set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
		test_bit(NVME_NS_FORCE_RO, &ns->flags));
+7 −0
Original line number Diff line number Diff line
@@ -18,6 +18,13 @@
/* default is -1: the fail fast mechanism is disabled  */
#define NVMF_DEF_FAIL_FAST_TMO		-1

/*
 * Reserved one command for internal usage.  This command is used for sending
 * the connect command, as well as for the keep alive command on the admin
 * queue once live.
 */
#define NVMF_RESERVED_TAGS	1

/*
 * Define a host as seen by the target.  We allocate one at boot, but also
 * allow the override it when creating controllers.  This is both to provide
+2 −2
Original line number Diff line number Diff line
@@ -2863,7 +2863,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
	ctrl->tag_set.ops = &nvme_fc_mq_ops;
	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
	ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS;
	ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
	ctrl->tag_set.cmd_size =
@@ -3485,7 +3485,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
	ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
	ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS;
	ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
	ctrl->admin_tag_set.cmd_size =
		struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
+7 −4
Original line number Diff line number Diff line
@@ -736,8 +736,11 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
		return ret;

	ctrl->ctrl.queue_count = nr_io_queues + 1;
	if (ctrl->ctrl.queue_count < 2)
		return 0;
	if (ctrl->ctrl.queue_count < 2) {
		dev_err(ctrl->ctrl.device,
			"unable to set any I/O queues\n");
		return -ENOMEM;
	}

	dev_info(ctrl->ctrl.device,
		"creating %d I/O queues.\n", nr_io_queues);
@@ -798,7 +801,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
		memset(set, 0, sizeof(*set));
		set->ops = &nvme_rdma_admin_mq_ops;
		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
		set->reserved_tags = 2; /* connect + keep-alive */
		set->reserved_tags = NVMF_RESERVED_TAGS;
		set->numa_node = nctrl->numa_node;
		set->cmd_size = sizeof(struct nvme_rdma_request) +
				NVME_RDMA_DATA_SGL_SIZE;
@@ -811,7 +814,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
		memset(set, 0, sizeof(*set));
		set->ops = &nvme_rdma_mq_ops;
		set->queue_depth = nctrl->sqsize + 1;
		set->reserved_tags = 1; /* fabric connect */
		set->reserved_tags = NVMF_RESERVED_TAGS;
		set->numa_node = nctrl->numa_node;
		set->flags = BLK_MQ_F_SHOULD_MERGE;
		set->cmd_size = sizeof(struct nvme_rdma_request) +
+15 −5
Original line number Diff line number Diff line
@@ -287,7 +287,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
	 * directly, otherwise queue io_work. Also, only do that if we
	 * are on the same cpu, so we don't introduce contention.
	 */
	if (queue->io_cpu == __smp_processor_id() &&
	if (queue->io_cpu == raw_smp_processor_id() &&
	    sync && empty && mutex_trylock(&queue->send_mutex)) {
		queue->more_requests = !last;
		nvme_tcp_send_all(queue);
@@ -568,6 +568,13 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
	req->pdu_len = le32_to_cpu(pdu->r2t_length);
	req->pdu_sent = 0;

	if (unlikely(!req->pdu_len)) {
		dev_err(queue->ctrl->ctrl.device,
			"req %d r2t len is %u, probably a bug...\n",
			rq->tag, req->pdu_len);
		return -EPROTO;
	}

	if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
		dev_err(queue->ctrl->ctrl.device,
			"req %d r2t len %u exceeded data len %u (%zu sent)\n",
@@ -1575,7 +1582,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
		memset(set, 0, sizeof(*set));
		set->ops = &nvme_tcp_admin_mq_ops;
		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
		set->reserved_tags = 2; /* connect + keep-alive */
		set->reserved_tags = NVMF_RESERVED_TAGS;
		set->numa_node = nctrl->numa_node;
		set->flags = BLK_MQ_F_BLOCKING;
		set->cmd_size = sizeof(struct nvme_tcp_request);
@@ -1587,7 +1594,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
		memset(set, 0, sizeof(*set));
		set->ops = &nvme_tcp_mq_ops;
		set->queue_depth = nctrl->sqsize + 1;
		set->reserved_tags = 1; /* fabric connect */
		set->reserved_tags = NVMF_RESERVED_TAGS;
		set->numa_node = nctrl->numa_node;
		set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
		set->cmd_size = sizeof(struct nvme_tcp_request);
@@ -1745,8 +1752,11 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
		return ret;

	ctrl->queue_count = nr_io_queues + 1;
	if (ctrl->queue_count < 2)
		return 0;
	if (ctrl->queue_count < 2) {
		dev_err(ctrl->device,
			"unable to set any I/O queues\n");
		return -ENOMEM;
	}

	dev_info(ctrl->device,
		"creating %d I/O queues.\n", nr_io_queues);
Loading