Merge branch 'nvme-4.12' of git://git.infradead.org/nvme into for-linus (85d0331a) · Commits · EulixOS / Software / Kernel

drivers/nvme/host/core.c

+14 −7

Original line number	Diff line number	Diff line
		@@ -56,7 +56,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
		static int nvme_char_major;
		module_param(nvme_char_major, int, 0);

		static unsigned long default_ps_max_latency_us = 25000;
		static unsigned long default_ps_max_latency_us = 100000;
		module_param(default_ps_max_latency_us, ulong, 0644);
		MODULE_PARM_DESC(default_ps_max_latency_us,
		"max power saving latency for new devices; use PM QOS to change per device");
		@@ -1342,7 +1342,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
		* transitioning between power states. Therefore, when running
		* in any given state, we will enter the next lower-power
		* non-operational state after waiting 50 * (enlat + exlat)
		* microseconds, as long as that state's total latency is under
		* microseconds, as long as that state's exit latency is under
		* the requested maximum latency.
		*
		* We will not autonomously enter any non-operational state for
		@@ -1387,7 +1387,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
		* lowest-power state, not the number of states.
		*/
		for (state = (int)ctrl->npss; state >= 0; state--) {
		u64 total_latency_us, transition_ms;
		u64 total_latency_us, exit_latency_us, transition_ms;

		if (target)
		table->entries[state] = target;
		@@ -1408,12 +1408,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
		NVME_PS_FLAGS_NON_OP_STATE))
		continue;

		total_latency_us =
		(u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
		+ le32_to_cpu(ctrl->psd[state].exit_lat);
		if (total_latency_us > ctrl->ps_max_latency_us)
		exit_latency_us =
		(u64)le32_to_cpu(ctrl->psd[state].exit_lat);
		if (exit_latency_us > ctrl->ps_max_latency_us)
		continue;

		total_latency_us =
		exit_latency_us +
		le32_to_cpu(ctrl->psd[state].entry_lat);

		/*
		* This state is good. Use it as the APST idle
		* target for higher power states.
		@@ -2438,6 +2441,10 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
		struct nvme_ns *ns;

		mutex_lock(&ctrl->namespaces_mutex);

		/* Forcibly start all queues to avoid having stuck requests */
		blk_mq_start_hw_queues(ctrl->admin_q);

		list_for_each_entry(ns, &ctrl->namespaces, list) {
		/*
		* Revalidating a dead namespace sets capacity to 0. This will

drivers/nvme/host/fc.c

+18 −2

Original line number	Diff line number	Diff line
		@@ -1139,6 +1139,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
		/* ********************* NVME Ctrl Routines ************************** */

		static void __nvme_fc_final_op_cleanup(struct request *rq);
		static void nvme_fc_error_recovery(struct nvme_fc_ctrl ctrl, char errmsg);

		static int
		nvme_fc_reinit_request(void data, struct request rq)
		@@ -1265,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
		struct nvme_command *sqe = &op->cmd_iu.sqe;
		__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
		union nvme_result result;
		bool complete_rq;
		bool complete_rq, terminate_assoc = true;

		/*
		* WARNING:
		@@ -1294,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
		* fabricate a CQE, the following fields will not be set as they
		* are not referenced:
		* cqe.sqid, cqe.sqhd, cqe.command_id
		*
		* Failure or error of an individual i/o, in a transport
		* detected fashion unrelated to the nvme completion status,
		* potentially cause the initiator and target sides to get out
		* of sync on SQ head/tail (aka outstanding io count allowed).
		* Per FC-NVME spec, failure of an individual command requires
		* the connection to be terminated, which in turn requires the
		* association to be terminated.
		*/

		fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
		@@ -1359,6 +1368,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
		goto done;
		}

		terminate_assoc = false;

		done:
		if (op->flags & FCOP_FLAGS_AEN) {
		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
		@@ -1366,7 +1377,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
		atomic_set(&op->state, FCPOP_STATE_IDLE);
		op->flags = FCOP_FLAGS_AEN; /* clear other flags */
		nvme_fc_ctrl_put(ctrl);
		return;
		goto check_error;
		}

		complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
		@@ -1379,6 +1390,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
		nvme_end_request(rq, status, result);
		} else
		__nvme_fc_final_op_cleanup(rq);

		check_error:
		if (terminate_assoc)
		nvme_fc_error_recovery(ctrl, "transport detected io error");
		}

		static int
		@@ -2791,6 +2806,7 @@ nvme_fc_init_ctrl(struct device dev, struct nvmf_ctrl_options opts,
		ctrl->ctrl.opts = NULL;
		/* initiate nvme ctrl ref counting teardown */
		nvme_uninit_ctrl(&ctrl->ctrl);
		nvme_put_ctrl(&ctrl->ctrl);

		/* as we're past the point where we transition to the ref
		* counting teardown path, if we return a bad pointer here,

drivers/nvme/host/pci.c

+6 −7

Original line number	Diff line number	Diff line
		@@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
		bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);

		/* If there is a reset ongoing, we shouldn't reset again. */
		if (work_busy(&dev->reset_work))
		if (dev->ctrl.state == NVME_CTRL_RESETTING)
		return false;

		/* We shouldn't reset unless the controller is on fatal error state
		@@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work)
		bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
		int result = -ENODEV;

		if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
		if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
		goto out;

		/*
		@@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work)
		if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
		nvme_dev_disable(dev, false);

		if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
		goto out;

		result = nvme_pci_enable(dev);
		if (result)
		goto out;
		@@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev)
		{
		if (!dev->ctrl.admin_q \|\| blk_queue_dying(dev->ctrl.admin_q))
		return -ENODEV;
		if (work_busy(&dev->reset_work))
		return -ENODEV;
		if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
		return -EBUSY;
		if (!queue_work(nvme_workq, &dev->reset_work))
		return -EBUSY;
		return 0;
		@@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev pdev, const struct pci_device_id id)
		if (result)
		goto release_pools;

		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
		dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));

		queue_work(nvme_workq, &dev->reset_work);
		@@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev)

		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);

		cancel_work_sync(&dev->reset_work);
		pci_set_drvdata(pdev, NULL);

		if (!pci_device_is_present(pdev)) {

drivers/nvme/host/rdma.c

+29 −15

Original line number	Diff line number	Diff line
		@@ -753,28 +753,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
		if (ret)
		goto requeue;

		blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);

		ret = nvmf_connect_admin_queue(&ctrl->ctrl);
		if (ret)
		goto stop_admin_q;
		goto requeue;

		set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);

		ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
		if (ret)
		goto stop_admin_q;
		goto requeue;

		nvme_start_keep_alive(&ctrl->ctrl);

		if (ctrl->queue_count > 1) {
		ret = nvme_rdma_init_io_queues(ctrl);
		if (ret)
		goto stop_admin_q;
		goto requeue;

		ret = nvme_rdma_connect_io_queues(ctrl);
		if (ret)
		goto stop_admin_q;
		goto requeue;
		}

		changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
		@@ -782,7 +780,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
		ctrl->ctrl.opts->nr_reconnects = 0;

		if (ctrl->queue_count > 1) {
		nvme_start_queues(&ctrl->ctrl);
		nvme_queue_scan(&ctrl->ctrl);
		nvme_queue_async_events(&ctrl->ctrl);
		}
		@@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)

		return;

		stop_admin_q:
		blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
		requeue:
		dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
		ctrl->ctrl.opts->nr_reconnects);
		@@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
		blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
		nvme_cancel_request, &ctrl->ctrl);

		/*
		* queues are not a live anymore, so restart the queues to fail fast
		* new IO
		*/
		blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
		nvme_start_queues(&ctrl->ctrl);

		nvme_rdma_reconnect_or_remove(ctrl);
		}

		@@ -1433,7 +1435,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
		/*
		* We cannot accept any other command until the Connect command has completed.
		*/
		static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
		static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
		struct request *rq)
		{
		if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
		@@ -1441,11 +1443,22 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,

		if (!blk_rq_is_passthrough(rq) \|\|
		cmd->common.opcode != nvme_fabrics_command \|\|
		cmd->fabrics.fctype != nvme_fabrics_type_connect)
		return false;
		cmd->fabrics.fctype != nvme_fabrics_type_connect) {
		/*
		* reconnecting state means transport disruption, which
		* can take a long time and even might fail permanently,
		* so we can't let incoming I/O be requeued forever.
		* fail it fast to allow upper layers a chance to
		* failover.
		*/
		if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
		return -EIO;
		else
		return -EAGAIN;
		}
		}

		return true;
		return 0;
		}

		static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
		@@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,

		WARN_ON_ONCE(rq->tag < 0);

		if (!nvme_rdma_queue_is_ready(queue, rq))
		return BLK_MQ_RQ_QUEUE_BUSY;
		ret = nvme_rdma_queue_is_ready(queue, rq);
		if (unlikely(ret))
		goto err;

		dev = queue->device->dev;
		ib_dma_sync_single_for_cpu(dev, sqe->dma,