Commit 77571ba6 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-6.1-2022-09-20' of git://git.infradead.org/nvme into for-6.1/block

Pull NVMe updates from Christoph:

"nvme updates for Linux 6.1

 - handle number of queue changes in the TCP and RDMA drivers
   (Daniel Wagner)
 - allow changing the number of queues in nvmet (Daniel Wagner)
 - also consider host_iface when checking ip options (Daniel Wagner)
 - don't map pages which can't come from HIGHMEM (Fabio M. De Francesco)
 - avoid unnecessary flush bios in nvmet (Guixin Liu)
 - shrink and better pack the nvme_iod structure (Keith Busch)
 - add comment for unaligned "fake" nqn (Linjun Bao)
 - print actual source IP address through sysfs "address" attr
   (Martin Belanger)
 - various cleanups (Jackie Liu, Wolfram Sang, Genjian Zhang)"

* tag 'nvme-6.1-2022-09-20' of git://git.infradead.org/nvme:
  nvme-tcp: print actual source IP address through sysfs "address" attr
  nvmet-tcp: don't map pages which can't come from HIGHMEM
  nvme-pci: move iod dma_len fill gaps
  nvme-pci: iod npages fits in s8
  nvme-pci: iod's 'aborted' is a bool
  nvme-pci: remove nvme_queue from nvme_iod
  nvme: consider also host_iface when checking ip options
  nvme-rdma: handle number of queue changes
  nvme-tcp: handle number of queue changes
  nvmet: expose max queues to configfs
  nvmet: avoid unnecessary flush bio
  nvmet-auth: remove redundant parameters req
  nvmet-auth: clean up with done_kfree
  nvme-auth: remove the redundant req->cqe->result.u16 assignment operation
  nvme: move from strlcpy with unused retval to strscpy
  nvme: add comment for unaligned "fake" nqn
parents d2d05b88 02c57a82
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -2696,7 +2696,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct
	if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) {
		nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
		if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
			strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
			strscpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
			return;
		}

@@ -2704,7 +2704,11 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct
			dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
	}

	/* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
	/*
	 * Generate a "fake" NQN similar to the one in Section 4.5 of the NVMe
	 * Base Specification 2.0.  It is slightly different from the format
	 * specified there due to historic reasons, and we can't change it now.
	 */
	off = snprintf(subsys->subnqn, NVMF_NQN_SIZE,
			"nqn.2014.08.org.nvmexpress:%04x%04x",
			le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
+19 −6
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
		goto out_unlock;

	kref_init(&host->ref);
	strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
	strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE);

	list_add_tail(&host->list, &nvmf_hosts);
out_unlock:
@@ -971,13 +971,17 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
		return false;

	/*
	 * Checking the local address is rough. In most cases, none is specified
	 * and the host port is selected by the stack.
	 * Checking the local address or host interfaces is rough.
	 *
	 * In most cases, none is specified and the host port or
	 * host interface is selected by the stack.
	 *
	 * Assume no match if:
	 * -  local address is specified and address is not the same
	 * -  local address is not specified but remote is, or vice versa
	 *    (admin using specific host_traddr when it matters).
	 * -  local address or host interface is specified and address
	 *    or host interface is not the same
	 * -  local address or host interface is not specified but
	 *    remote is, or vice versa (admin using specific
	 *    host_traddr/host_iface when it matters).
	 */
	if ((opts->mask & NVMF_OPT_HOST_TRADDR) &&
	    (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
@@ -988,6 +992,15 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
		return false;
	}

	if ((opts->mask & NVMF_OPT_HOST_IFACE) &&
	    (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) {
		if (strcmp(opts->host_iface, ctrl->opts->host_iface))
			return false;
	} else if ((opts->mask & NVMF_OPT_HOST_IFACE) ||
		   (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) {
		return false;
	}

	return true;
}
EXPORT_SYMBOL_GPL(nvmf_ip_options_match);
+33 −32
Original line number Diff line number Diff line
@@ -226,12 +226,12 @@ struct nvme_queue {
struct nvme_iod {
	struct nvme_request req;
	struct nvme_command cmd;
	struct nvme_queue *nvmeq;
	bool use_sgl;
	int aborted;
	int npages;		/* In the PRP list. 0 means small pool in use */
	dma_addr_t first_dma;
	bool aborted;
	s8 nr_allocations;	/* PRP list pool allocations. 0 means small
				   pool in use */
	unsigned int dma_len;	/* length of single DMA segment mapping */
	dma_addr_t first_dma;
	dma_addr_t meta_dma;
	struct sg_table sgt;
};
@@ -430,11 +430,6 @@ static int nvme_pci_init_request(struct blk_mq_tag_set *set,
{
	struct nvme_dev *dev = set->driver_data;
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
	struct nvme_queue *nvmeq = &dev->queues[queue_idx];

	BUG_ON(!nvmeq);
	iod->nvmeq = nvmeq;

	nvme_req(req)->ctrl = &dev->ctrl;
	nvme_req(req)->cmd = &iod->cmd;
@@ -526,7 +521,7 @@ static void **nvme_pci_iod_list(struct request *req)

static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
{
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
	int nseg = blk_rq_nr_phys_segments(req);
	unsigned int avg_seg_size;

@@ -534,7 +529,7 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)

	if (!nvme_ctrl_sgl_supported(&dev->ctrl))
		return false;
	if (!iod->nvmeq->qid)
	if (!nvmeq->qid)
		return false;
	if (!sgl_threshold || avg_seg_size < sgl_threshold)
		return false;
@@ -548,7 +543,7 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
	dma_addr_t dma_addr = iod->first_dma;
	int i;

	for (i = 0; i < iod->npages; i++) {
	for (i = 0; i < iod->nr_allocations; i++) {
		__le64 *prp_list = nvme_pci_iod_list(req)[i];
		dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);

@@ -564,7 +559,7 @@ static void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
	dma_addr_t dma_addr = iod->first_dma;
	int i;

	for (i = 0; i < iod->npages; i++) {
	for (i = 0; i < iod->nr_allocations; i++) {
		struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i];
		dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr);

@@ -587,7 +582,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)

	dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0);

	if (iod->npages == 0)
	if (iod->nr_allocations == 0)
		dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
			      iod->first_dma);
	else if (iod->use_sgl)
@@ -649,15 +644,15 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
	nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
	if (nprps <= (256 / 8)) {
		pool = dev->prp_small_pool;
		iod->npages = 0;
		iod->nr_allocations = 0;
	} else {
		pool = dev->prp_page_pool;
		iod->npages = 1;
		iod->nr_allocations = 1;
	}

	prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
	if (!prp_list) {
		iod->npages = -1;
		iod->nr_allocations = -1;
		return BLK_STS_RESOURCE;
	}
	list[0] = prp_list;
@@ -669,7 +664,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
			prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
			if (!prp_list)
				goto free_prps;
			list[iod->npages++] = prp_list;
			list[iod->nr_allocations++] = prp_list;
			prp_list[0] = old_prp_list[i - 1];
			old_prp_list[i - 1] = cpu_to_le64(prp_dma);
			i = 1;
@@ -744,15 +739,15 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,

	if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
		pool = dev->prp_small_pool;
		iod->npages = 0;
		iod->nr_allocations = 0;
	} else {
		pool = dev->prp_page_pool;
		iod->npages = 1;
		iod->nr_allocations = 1;
	}

	sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
	if (!sg_list) {
		iod->npages = -1;
		iod->nr_allocations = -1;
		return BLK_STS_RESOURCE;
	}

@@ -771,7 +766,7 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
				goto free_sgls;

			i = 0;
			nvme_pci_iod_list(req)[iod->npages++] = sg_list;
			nvme_pci_iod_list(req)[iod->nr_allocations++] = sg_list;
			sg_list[i++] = *link;
			nvme_pci_sgl_set_seg(link, sgl_dma, entries);
		}
@@ -831,6 +826,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
	int rc;

	if (blk_rq_nr_phys_segments(req) == 1) {
		struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
		struct bio_vec bv = req_bvec(req);

		if (!is_pci_p2pdma_page(bv.bv_page)) {
@@ -838,7 +834,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
				return nvme_setup_prp_simple(dev, req,
							     &cmnd->rw, &bv);

			if (iod->nvmeq->qid && sgl_threshold &&
			if (nvmeq->qid && sgl_threshold &&
			    nvme_ctrl_sgl_supported(&dev->ctrl))
				return nvme_setup_sgl_simple(dev, req,
							     &cmnd->rw, &bv);
@@ -896,8 +892,8 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	blk_status_t ret;

	iod->aborted = 0;
	iod->npages = -1;
	iod->aborted = false;
	iod->nr_allocations = -1;
	iod->sgt.nents = 0;

	ret = nvme_setup_cmd(req->q->queuedata, req);
@@ -1017,12 +1013,16 @@ static void nvme_queue_rqs(struct request **rqlist)

static __always_inline void nvme_pci_unmap_rq(struct request *req)
{
	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
	struct nvme_dev *dev = nvmeq->dev;

	if (blk_integrity_rq(req)) {
	        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	struct nvme_dev *dev = iod->nvmeq->dev;

	if (blk_integrity_rq(req))
		dma_unmap_page(dev->dev, iod->meta_dma,
			       rq_integrity_vec(req)->bv_len, rq_data_dir(req));
	}

	if (blk_rq_nr_phys_segments(req))
		nvme_unmap_data(dev, req);
}
@@ -1270,8 +1270,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)

static void abort_endio(struct request *req, blk_status_t error)
{
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	struct nvme_queue *nvmeq = iod->nvmeq;
	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;

	dev_warn(nvmeq->dev->ctrl.device,
		 "Abort status: 0x%x", nvme_req(req)->status);
@@ -1333,7 +1332,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
static enum blk_eh_timer_return nvme_timeout(struct request *req)
{
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	struct nvme_queue *nvmeq = iod->nvmeq;
	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
	struct nvme_dev *dev = nvmeq->dev;
	struct request *abort_req;
	struct nvme_command cmd = { };
@@ -1414,7 +1413,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
		atomic_inc(&dev->ctrl.abort_limit);
		return BLK_EH_RESET_TIMER;
	}
	iod->aborted = 1;
	iod->aborted = true;

	cmd.abort.opcode = nvme_admin_abort_cmd;
	cmd.abort.cid = nvme_cid(req);
@@ -3561,6 +3560,8 @@ static int __init nvme_init(void)
	BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
	BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
	BUILD_BUG_ON(DIV_ROUND_UP(nvme_pci_npages_prp(), NVME_CTRL_PAGE_SIZE) >
		     S8_MAX);

	return pci_register_driver(&nvme_driver);
}
+21 −5
Original line number Diff line number Diff line
@@ -696,11 +696,12 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
	return ret;
}

static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl)
static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
				     int first, int last)
{
	int i, ret = 0;

	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
	for (i = first; i < last; i++) {
		ret = nvme_rdma_start_queue(ctrl, i);
		if (ret)
			goto out_stop_queues;
@@ -709,7 +710,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl)
	return 0;

out_stop_queues:
	for (i--; i >= 1; i--)
	for (i--; i >= first; i--)
		nvme_rdma_stop_queue(&ctrl->queues[i]);
	return ret;
}
@@ -964,7 +965,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,

static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
{
	int ret;
	int ret, nr_queues;

	ret = nvme_rdma_alloc_io_queues(ctrl);
	if (ret)
@@ -980,7 +981,13 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
			goto out_free_tag_set;
	}

	ret = nvme_rdma_start_io_queues(ctrl);
	/*
	 * Only start IO queues for which we have allocated the tagset
	 * and limitted it to the available queues. On reconnects, the
	 * queue number might have changed.
	 */
	nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count);
	ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues);
	if (ret)
		goto out_cleanup_connect_q;

@@ -1000,6 +1007,15 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
		nvme_unfreeze(&ctrl->ctrl);
	}

	/*
	 * If the number of queues has increased (reconnect case)
	 * start all new queues now.
	 */
	ret = nvme_rdma_start_io_queues(ctrl, nr_queues,
					ctrl->tag_set.nr_hw_queues + 1);
	if (ret)
		goto out_wait_freeze_timed_out;

	return 0;

out_wait_freeze_timed_out:
+41 −6
Original line number Diff line number Diff line
@@ -1762,11 +1762,12 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
		nvme_tcp_stop_queue(ctrl, i);
}

static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl,
				    int first, int last)
{
	int i, ret;

	for (i = 1; i < ctrl->queue_count; i++) {
	for (i = first; i < last; i++) {
		ret = nvme_tcp_start_queue(ctrl, i);
		if (ret)
			goto out_stop_queues;
@@ -1775,7 +1776,7 @@ static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
	return 0;

out_stop_queues:
	for (i--; i >= 1; i--)
	for (i--; i >= first; i--)
		nvme_tcp_stop_queue(ctrl, i);
	return ret;
}
@@ -1901,7 +1902,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)

static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
{
	int ret;
	int ret, nr_queues;

	ret = nvme_tcp_alloc_io_queues(ctrl);
	if (ret)
@@ -1917,7 +1918,13 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
			goto out_free_tag_set;
	}

	ret = nvme_tcp_start_io_queues(ctrl);
	/*
	 * Only start IO queues for which we have allocated the tagset
	 * and limitted it to the available queues. On reconnects, the
	 * queue number might have changed.
	 */
	nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count);
	ret = nvme_tcp_start_io_queues(ctrl, 1, nr_queues);
	if (ret)
		goto out_cleanup_connect_q;

@@ -1937,6 +1944,15 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
		nvme_unfreeze(ctrl);
	}

	/*
	 * If the number of queues has increased (reconnect case)
	 * start all new queues now.
	 */
	ret = nvme_tcp_start_io_queues(ctrl, nr_queues,
				       ctrl->tagset->nr_hw_queues + 1);
	if (ret)
		goto out_wait_freeze_timed_out;

	return 0;

out_wait_freeze_timed_out:
@@ -2530,6 +2546,25 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
	return queue->nr_cqe;
}

static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
{
	struct nvme_tcp_queue *queue = &to_tcp_ctrl(ctrl)->queues[0];
	struct sockaddr_storage src_addr;
	int ret, len;

	len = nvmf_get_address(ctrl, buf, size);

	ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
	if (ret > 0) {
		if (len > 0)
			len--; /* strip trailing newline */
		len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n",
				(len) ? "," : "", &src_addr);
	}

	return len;
}

static const struct blk_mq_ops nvme_tcp_mq_ops = {
	.queue_rq	= nvme_tcp_queue_rq,
	.commit_rqs	= nvme_tcp_commit_rqs,
@@ -2561,7 +2596,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
	.free_ctrl		= nvme_tcp_free_ctrl,
	.submit_async_event	= nvme_tcp_submit_async_event,
	.delete_ctrl		= nvme_tcp_delete_ctrl,
	.get_address		= nvmf_get_address,
	.get_address		= nvme_tcp_get_address,
	.stop_ctrl		= nvme_tcp_stop_ctrl,
};

Loading