Loading drivers/nvme/host/core.c +14 −7 Original line number Diff line number Diff line Loading @@ -56,7 +56,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); static int nvme_char_major; module_param(nvme_char_major, int, 0); static unsigned long default_ps_max_latency_us = 25000; static unsigned long default_ps_max_latency_us = 100000; module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); Loading Loading @@ -1342,7 +1342,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * transitioning between power states. Therefore, when running * in any given state, we will enter the next lower-power * non-operational state after waiting 50 * (enlat + exlat) * microseconds, as long as that state's total latency is under * microseconds, as long as that state's exit latency is under * the requested maximum latency. * * We will not autonomously enter any non-operational state for Loading Loading @@ -1387,7 +1387,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * lowest-power state, not the number of states. */ for (state = (int)ctrl->npss; state >= 0; state--) { u64 total_latency_us, transition_ms; u64 total_latency_us, exit_latency_us, transition_ms; if (target) table->entries[state] = target; Loading @@ -1408,12 +1408,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) NVME_PS_FLAGS_NON_OP_STATE)) continue; total_latency_us = (u64)le32_to_cpu(ctrl->psd[state].entry_lat) + + le32_to_cpu(ctrl->psd[state].exit_lat); if (total_latency_us > ctrl->ps_max_latency_us) exit_latency_us = (u64)le32_to_cpu(ctrl->psd[state].exit_lat); if (exit_latency_us > ctrl->ps_max_latency_us) continue; total_latency_us = exit_latency_us + le32_to_cpu(ctrl->psd[state].entry_lat); /* * This state is good. Use it as the APST idle * target for higher power states. Loading Loading @@ -2438,6 +2441,10 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->namespaces_mutex); /* Forcibly start all queues to avoid having stuck requests */ blk_mq_start_hw_queues(ctrl->admin_q); list_for_each_entry(ns, &ctrl->namespaces, list) { /* * Revalidating a dead namespace sets capacity to 0. This will Loading drivers/nvme/host/fc.c +18 −2 Original line number Diff line number Diff line Loading @@ -1139,6 +1139,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) /* *********************** NVME Ctrl Routines **************************** */ static void __nvme_fc_final_op_cleanup(struct request *rq); static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); static int nvme_fc_reinit_request(void *data, struct request *rq) Loading Loading @@ -1265,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_command *sqe = &op->cmd_iu.sqe; __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); union nvme_result result; bool complete_rq; bool complete_rq, terminate_assoc = true; /* * WARNING: Loading Loading @@ -1294,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) * fabricate a CQE, the following fields will not be set as they * are not referenced: * cqe.sqid, cqe.sqhd, cqe.command_id * * Failure or error of an individual i/o, in a transport * detected fashion unrelated to the nvme completion status, * potentially cause the initiator and target sides to get out * of sync on SQ head/tail (aka outstanding io count allowed). * Per FC-NVME spec, failure of an individual command requires * the connection to be terminated, which in turn requires the * association to be terminated. */ fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, Loading Loading @@ -1359,6 +1368,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) goto done; } terminate_assoc = false; done: if (op->flags & FCOP_FLAGS_AEN) { nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); Loading @@ -1366,7 +1377,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) atomic_set(&op->state, FCPOP_STATE_IDLE); op->flags = FCOP_FLAGS_AEN; /* clear other flags */ nvme_fc_ctrl_put(ctrl); return; goto check_error; } complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); Loading @@ -1379,6 +1390,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) nvme_end_request(rq, status, result); } else __nvme_fc_final_op_cleanup(rq); check_error: if (terminate_assoc) nvme_fc_error_recovery(ctrl, "transport detected io error"); } static int Loading Loading @@ -2791,6 +2806,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = NULL; /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); /* as we're past the point where we transition to the ref * counting teardown path, if we return a bad pointer here, Loading drivers/nvme/host/pci.c +6 −7 Original line number Diff line number Diff line Loading @@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); /* If there is a reset ongoing, we shouldn't reset again. */ if (work_busy(&dev->reset_work)) if (dev->ctrl.state == NVME_CTRL_RESETTING) return false; /* We shouldn't reset unless the controller is on fatal error state Loading Loading @@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work) bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result = -ENODEV; if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) goto out; /* Loading @@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) goto out; result = nvme_pci_enable(dev); if (result) goto out; Loading Loading @@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev) { if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) return -ENODEV; if (work_busy(&dev->reset_work)) return -ENODEV; if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) return -EBUSY; if (!queue_work(nvme_workq, &dev->reset_work)) return -EBUSY; return 0; Loading Loading @@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING); dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); queue_work(nvme_workq, &dev->reset_work); Loading Loading @@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); cancel_work_sync(&dev->reset_work); pci_set_drvdata(pdev, NULL); if (!pci_device_is_present(pdev)) { Loading drivers/nvme/host/rdma.c +29 −15 Original line number Diff line number Diff line Loading @@ -753,28 +753,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ret) goto requeue; blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); ret = nvmf_connect_admin_queue(&ctrl->ctrl); if (ret) goto stop_admin_q; goto requeue; set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (ret) goto stop_admin_q; goto requeue; nvme_start_keep_alive(&ctrl->ctrl); if (ctrl->queue_count > 1) { ret = nvme_rdma_init_io_queues(ctrl); if (ret) goto stop_admin_q; goto requeue; ret = nvme_rdma_connect_io_queues(ctrl); if (ret) goto stop_admin_q; goto requeue; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); Loading @@ -782,7 +780,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ctrl->ctrl.opts->nr_reconnects = 0; if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); nvme_queue_async_events(&ctrl->ctrl); } Loading @@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) return; stop_admin_q: blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.opts->nr_reconnects); Loading Loading @@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); /* * queues are not a live anymore, so restart the queues to fail fast * new IO */ blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); nvme_start_queues(&ctrl->ctrl); nvme_rdma_reconnect_or_remove(ctrl); } Loading Loading @@ -1433,7 +1435,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) /* * We cannot accept any other command until the Connect command has completed. */ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) { if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { Loading @@ -1441,11 +1443,22 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, if (!blk_rq_is_passthrough(rq) || cmd->common.opcode != nvme_fabrics_command || cmd->fabrics.fctype != nvme_fabrics_type_connect) return false; cmd->fabrics.fctype != nvme_fabrics_type_connect) { /* * reconnecting state means transport disruption, which * can take a long time and even might fail permanently, * so we can't let incoming I/O be requeued forever. * fail it fast to allow upper layers a chance to * failover. */ if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING) return -EIO; else return -EAGAIN; } } return true; return 0; } static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, Loading @@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); if (!nvme_rdma_queue_is_ready(queue, rq)) return BLK_MQ_RQ_QUEUE_BUSY; ret = nvme_rdma_queue_is_ready(queue, rq); if (unlikely(ret)) goto err; dev = queue->device->dev; ib_dma_sync_single_for_cpu(dev, sqe->dma, Loading Loading
drivers/nvme/host/core.c +14 −7 Original line number Diff line number Diff line Loading @@ -56,7 +56,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); static int nvme_char_major; module_param(nvme_char_major, int, 0); static unsigned long default_ps_max_latency_us = 25000; static unsigned long default_ps_max_latency_us = 100000; module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); Loading Loading @@ -1342,7 +1342,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * transitioning between power states. Therefore, when running * in any given state, we will enter the next lower-power * non-operational state after waiting 50 * (enlat + exlat) * microseconds, as long as that state's total latency is under * microseconds, as long as that state's exit latency is under * the requested maximum latency. * * We will not autonomously enter any non-operational state for Loading Loading @@ -1387,7 +1387,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * lowest-power state, not the number of states. */ for (state = (int)ctrl->npss; state >= 0; state--) { u64 total_latency_us, transition_ms; u64 total_latency_us, exit_latency_us, transition_ms; if (target) table->entries[state] = target; Loading @@ -1408,12 +1408,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) NVME_PS_FLAGS_NON_OP_STATE)) continue; total_latency_us = (u64)le32_to_cpu(ctrl->psd[state].entry_lat) + + le32_to_cpu(ctrl->psd[state].exit_lat); if (total_latency_us > ctrl->ps_max_latency_us) exit_latency_us = (u64)le32_to_cpu(ctrl->psd[state].exit_lat); if (exit_latency_us > ctrl->ps_max_latency_us) continue; total_latency_us = exit_latency_us + le32_to_cpu(ctrl->psd[state].entry_lat); /* * This state is good. Use it as the APST idle * target for higher power states. Loading Loading @@ -2438,6 +2441,10 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->namespaces_mutex); /* Forcibly start all queues to avoid having stuck requests */ blk_mq_start_hw_queues(ctrl->admin_q); list_for_each_entry(ns, &ctrl->namespaces, list) { /* * Revalidating a dead namespace sets capacity to 0. This will Loading
drivers/nvme/host/fc.c +18 −2 Original line number Diff line number Diff line Loading @@ -1139,6 +1139,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) /* *********************** NVME Ctrl Routines **************************** */ static void __nvme_fc_final_op_cleanup(struct request *rq); static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); static int nvme_fc_reinit_request(void *data, struct request *rq) Loading Loading @@ -1265,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_command *sqe = &op->cmd_iu.sqe; __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); union nvme_result result; bool complete_rq; bool complete_rq, terminate_assoc = true; /* * WARNING: Loading Loading @@ -1294,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) * fabricate a CQE, the following fields will not be set as they * are not referenced: * cqe.sqid, cqe.sqhd, cqe.command_id * * Failure or error of an individual i/o, in a transport * detected fashion unrelated to the nvme completion status, * potentially cause the initiator and target sides to get out * of sync on SQ head/tail (aka outstanding io count allowed). * Per FC-NVME spec, failure of an individual command requires * the connection to be terminated, which in turn requires the * association to be terminated. */ fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, Loading Loading @@ -1359,6 +1368,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) goto done; } terminate_assoc = false; done: if (op->flags & FCOP_FLAGS_AEN) { nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); Loading @@ -1366,7 +1377,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) atomic_set(&op->state, FCPOP_STATE_IDLE); op->flags = FCOP_FLAGS_AEN; /* clear other flags */ nvme_fc_ctrl_put(ctrl); return; goto check_error; } complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); Loading @@ -1379,6 +1390,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) nvme_end_request(rq, status, result); } else __nvme_fc_final_op_cleanup(rq); check_error: if (terminate_assoc) nvme_fc_error_recovery(ctrl, "transport detected io error"); } static int Loading Loading @@ -2791,6 +2806,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = NULL; /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); /* as we're past the point where we transition to the ref * counting teardown path, if we return a bad pointer here, Loading
drivers/nvme/host/pci.c +6 −7 Original line number Diff line number Diff line Loading @@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); /* If there is a reset ongoing, we shouldn't reset again. */ if (work_busy(&dev->reset_work)) if (dev->ctrl.state == NVME_CTRL_RESETTING) return false; /* We shouldn't reset unless the controller is on fatal error state Loading Loading @@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work) bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result = -ENODEV; if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) goto out; /* Loading @@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) goto out; result = nvme_pci_enable(dev); if (result) goto out; Loading Loading @@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev) { if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) return -ENODEV; if (work_busy(&dev->reset_work)) return -ENODEV; if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) return -EBUSY; if (!queue_work(nvme_workq, &dev->reset_work)) return -EBUSY; return 0; Loading Loading @@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING); dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); queue_work(nvme_workq, &dev->reset_work); Loading Loading @@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); cancel_work_sync(&dev->reset_work); pci_set_drvdata(pdev, NULL); if (!pci_device_is_present(pdev)) { Loading
drivers/nvme/host/rdma.c +29 −15 Original line number Diff line number Diff line Loading @@ -753,28 +753,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ret) goto requeue; blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); ret = nvmf_connect_admin_queue(&ctrl->ctrl); if (ret) goto stop_admin_q; goto requeue; set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (ret) goto stop_admin_q; goto requeue; nvme_start_keep_alive(&ctrl->ctrl); if (ctrl->queue_count > 1) { ret = nvme_rdma_init_io_queues(ctrl); if (ret) goto stop_admin_q; goto requeue; ret = nvme_rdma_connect_io_queues(ctrl); if (ret) goto stop_admin_q; goto requeue; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); Loading @@ -782,7 +780,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ctrl->ctrl.opts->nr_reconnects = 0; if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); nvme_queue_async_events(&ctrl->ctrl); } Loading @@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) return; stop_admin_q: blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.opts->nr_reconnects); Loading Loading @@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); /* * queues are not a live anymore, so restart the queues to fail fast * new IO */ blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); nvme_start_queues(&ctrl->ctrl); nvme_rdma_reconnect_or_remove(ctrl); } Loading Loading @@ -1433,7 +1435,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) /* * We cannot accept any other command until the Connect command has completed. */ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) { if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { Loading @@ -1441,11 +1443,22 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, if (!blk_rq_is_passthrough(rq) || cmd->common.opcode != nvme_fabrics_command || cmd->fabrics.fctype != nvme_fabrics_type_connect) return false; cmd->fabrics.fctype != nvme_fabrics_type_connect) { /* * reconnecting state means transport disruption, which * can take a long time and even might fail permanently, * so we can't let incoming I/O be requeued forever. * fail it fast to allow upper layers a chance to * failover. */ if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING) return -EIO; else return -EAGAIN; } } return true; return 0; } static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, Loading @@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); if (!nvme_rdma_queue_is_ready(queue, rq)) return BLK_MQ_RQ_QUEUE_BUSY; ret = nvme_rdma_queue_is_ready(queue, rq); if (unlikely(ret)) goto err; dev = queue->device->dev; ib_dma_sync_single_for_cpu(dev, sqe->dma, Loading