Commit 1181a462 authored by Chao Leng's avatar Chao Leng Committed by Zheng Zengkai
Browse files

nvme-rdma: avoid request double completion for concurrent nvme_rdma_timeout



stable inclusion
from stable-5.10.14
commit 3b87f9b0624595ec452154efcdca38ddc195147a
bugzilla: 48051

--------------------------------

[ Upstream commit 7674073b ]

A crash happens when inject completing request long time(nearly 30s).
Each name space has a request queue, when inject completing request long
time, multi request queues may have time out requests at the same time,
nvme_rdma_timeout will execute concurrently. Multi requests in different
request queues may be queued in the same rdma queue, multi
nvme_rdma_timeout may call nvme_rdma_stop_queue at the same time.
The first nvme_rdma_timeout will clear NVME_RDMA_Q_LIVE and continue
stopping the rdma queue(drain qp), but the others check NVME_RDMA_Q_LIVE
is already cleared, and then directly complete the requests, complete
request before the qp is fully drained may lead to a use-after-free
condition.

Add a multex lock to serialize nvme_rdma_stop_queue.

Signed-off-by: default avatarChao Leng <lengchao@huawei.com>
Tested-by: default avatarIsrael Rukshin <israelr@nvidia.com>
Reviewed-by: default avatarIsrael Rukshin <israelr@nvidia.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Acked-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
parent 8835bbf4
Loading
Loading
Loading
Loading
+11 −4
Original line number Diff line number Diff line
@@ -97,6 +97,7 @@ struct nvme_rdma_queue {
	struct completion	cm_done;
	bool			pi_support;
	int			cq_size;
	struct mutex		queue_lock;
};

struct nvme_rdma_ctrl {
@@ -579,6 +580,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
	int ret;

	queue = &ctrl->queues[idx];
	mutex_init(&queue->queue_lock);
	queue->ctrl = ctrl;
	if (idx && ctrl->ctrl.max_integrity_segments)
		queue->pi_support = true;
@@ -598,7 +600,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
	if (IS_ERR(queue->cm_id)) {
		dev_info(ctrl->ctrl.device,
			"failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
		return PTR_ERR(queue->cm_id);
		ret = PTR_ERR(queue->cm_id);
		goto out_destroy_mutex;
	}

	if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
@@ -628,6 +631,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
out_destroy_cm_id:
	rdma_destroy_id(queue->cm_id);
	nvme_rdma_destroy_queue_ib(queue);
out_destroy_mutex:
	mutex_destroy(&queue->queue_lock);
	return ret;
}

@@ -639,9 +644,10 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)

static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
	if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
		return;
	mutex_lock(&queue->queue_lock);
	if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
		__nvme_rdma_stop_queue(queue);
	mutex_unlock(&queue->queue_lock);
}

static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
@@ -651,6 +657,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)

	nvme_rdma_destroy_queue_ib(queue);
	rdma_destroy_id(queue->cm_id);
	mutex_destroy(&queue->queue_lock);
}

static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)