Commit aaeadd70 authored by Sagi Grimberg's avatar Sagi Grimberg Committed by Christoph Hellwig
Browse files

nvmet: fix false keep-alive timeout when a controller is torn down



Controller teardown flow may take some time in case it has many I/O
queues, and the host may not send us keep-alive during this period.
Hence reset the traffic based keep-alive timer so we don't trigger
a controller teardown as a result of a keep-alive expiration.

Reported-by: default avatarYi Zhang <yi.zhang@redhat.com>
Signed-off-by: default avatarSagi Grimberg <sagi@grimberg.me>
Reviewed-by: default avatarChaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Tested-by: default avatarYi Zhang <yi.zhang@redhat.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 25df1acd
Loading
Loading
Loading
Loading
+11 −4
Original line number Diff line number Diff line
@@ -388,10 +388,10 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
{
	struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
			struct nvmet_ctrl, ka_work);
	bool cmd_seen = ctrl->cmd_seen;
	bool reset_tbkas = ctrl->reset_tbkas;

	ctrl->cmd_seen = false;
	if (cmd_seen) {
	ctrl->reset_tbkas = false;
	if (reset_tbkas) {
		pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
			ctrl->cntlid);
		schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
@@ -804,6 +804,13 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
	percpu_ref_exit(&sq->ref);

	if (ctrl) {
		/*
		 * The teardown flow may take some time, and the host may not
		 * send us keep-alive during this period, hence reset the
		 * traffic based keep-alive timer so we don't trigger a
		 * controller teardown as a result of a keep-alive expiration.
		 */
		ctrl->reset_tbkas = true;
		nvmet_ctrl_put(ctrl);
		sq->ctrl = NULL; /* allows reusing the queue later */
	}
@@ -952,7 +959,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
	}

	if (sq->ctrl)
		sq->ctrl->cmd_seen = true;
		sq->ctrl->reset_tbkas = true;

	return true;

+1 −1
Original line number Diff line number Diff line
@@ -167,7 +167,7 @@ struct nvmet_ctrl {
	struct nvmet_subsys	*subsys;
	struct nvmet_sq		**sqs;

	bool			cmd_seen;
	bool			reset_tbkas;

	struct mutex		lock;
	u64			cap;