blk-mq: fix issue with shared tag queue re-running (eb619fdb) · Commits · 方亚芬 / raspberrypi_linux

block/blk-mq-debugfs.c

+0 −1

Original line number	Original line	Diff line number	Diff line
	@@ -179,7 +179,6 @@ static const char *const hctx_state_name[] = {
	HCTX_STATE_NAME(STOPPED),		HCTX_STATE_NAME(STOPPED),
	HCTX_STATE_NAME(TAG_ACTIVE),		HCTX_STATE_NAME(TAG_ACTIVE),
	HCTX_STATE_NAME(SCHED_RESTART),		HCTX_STATE_NAME(SCHED_RESTART),
	HCTX_STATE_NAME(TAG_WAITING),
	HCTX_STATE_NAME(START_ON_RUN),		HCTX_STATE_NAME(START_ON_RUN),
	};		};
	#undef HCTX_STATE_NAME		#undef HCTX_STATE_NAME

block/blk-mq.c

+48 −37

Original line number	Original line	Diff line number	Diff line
	@@ -998,41 +998,55 @@ done:
	return rq->tag != -1;		return rq->tag != -1;
	}		}

	static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags,		static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
	void *key)		int flags, void *key)
	{		{
	struct blk_mq_hw_ctx *hctx;		struct blk_mq_hw_ctx *hctx;

	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);		hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);

	list_del(&wait->entry);		list_del_init(&wait->entry);
	clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
	blk_mq_run_hw_queue(hctx, true);		blk_mq_run_hw_queue(hctx, true);
	return 1;		return 1;
	}		}

	static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)		static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
			struct request *rq)
	{		{
			struct blk_mq_hw_ctx this_hctx = hctx;
			wait_queue_entry_t *wait = &this_hctx->dispatch_wait;
	struct sbq_wait_state *ws;		struct sbq_wait_state *ws;

			if (!list_empty_careful(&wait->entry))
			return false;

			spin_lock(&this_hctx->lock);
			if (!list_empty(&wait->entry)) {
			spin_unlock(&this_hctx->lock);
			return false;
			}

			ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
			add_wait_queue(&ws->wait, wait);

	/*		/*
	* The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.		* It's possible that a tag was freed in the window between the
	* The thread which wins the race to grab this bit adds the hardware		* allocation failure and adding the hardware queue to the wait
	* queue to the wait queue.		* queue.
	*/		*/
	if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) \|\|		if (!blk_mq_get_driver_tag(rq, hctx, false)) {
	test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))		spin_unlock(&this_hctx->lock);
	return false;		return false;
			}
	init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
	ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);

	/*		/*
	* As soon as this returns, it's no longer safe to fiddle with		* We got a tag, remove ourselves from the wait queue to ensure
	* hctx->dispatch_wait, since a completion can wake up the wait queue		* someone else gets the wakeup.
	* and unlock the bit.
	*/		*/
	add_wait_queue(&ws->wait, &hctx->dispatch_wait);		spin_lock_irq(&ws->wait.lock);
			list_del_init(&wait->entry);
			spin_unlock_irq(&ws->wait.lock);
			spin_unlock(&this_hctx->lock);
	return true;		return true;
	}		}

	@@ -1041,6 +1055,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue q, struct list_head list,
	{		{
	struct blk_mq_hw_ctx *hctx;		struct blk_mq_hw_ctx *hctx;
	struct request rq, nxt;		struct request rq, nxt;
			bool no_tag = false;
	int errors, queued;		int errors, queued;

	if (list_empty(list))		if (list_empty(list))
	@@ -1060,22 +1075,15 @@ bool blk_mq_dispatch_rq_list(struct request_queue q, struct list_head list,
	if (!blk_mq_get_driver_tag(rq, &hctx, false)) {		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
	/*		/*
	* The initial allocation attempt failed, so we need to		* The initial allocation attempt failed, so we need to
	* rerun the hardware queue when a tag is freed.		* rerun the hardware queue when a tag is freed. The
			* waitqueue takes care of that. If the queue is run
			* before we add this entry back on the dispatch list,
			* we'll re-run it below.
	*/		*/
	if (!blk_mq_dispatch_wait_add(hctx)) {		if (!blk_mq_dispatch_wait_add(&hctx, rq)) {
	if (got_budget)
	blk_mq_put_dispatch_budget(hctx);
	break;
	}

	/*
	* It's possible that a tag was freed in the window
	* between the allocation failure and adding the
	* hardware queue to the wait queue.
	*/
	if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
	if (got_budget)		if (got_budget)
	blk_mq_put_dispatch_budget(hctx);		blk_mq_put_dispatch_budget(hctx);
			no_tag = true;
	break;		break;
	}		}
	}		}
	@@ -1140,10 +1148,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue q, struct list_head list,
	* it is no longer set that means that it was cleared by another		* it is no longer set that means that it was cleared by another
	* thread and hence that a queue rerun is needed.		* thread and hence that a queue rerun is needed.
	*		*
	* If TAG_WAITING is set that means that an I/O scheduler has		* If 'no_tag' is set, that means that we failed getting
	* been configured and another thread is waiting for a driver		* a driver tag with an I/O scheduler attached. If our dispatch
	* tag. To guarantee fairness, do not rerun this hardware queue		* waitqueue is no longer active, ensure that we run the queue
	* but let the other thread grab the driver tag.		* AFTER adding our entries back to the list.
	*		*
	* If no I/O scheduler has been configured it is possible that		* If no I/O scheduler has been configured it is possible that
	* the hardware queue got stopped and restarted before requests		* the hardware queue got stopped and restarted before requests
	@@ -1155,8 +1163,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue q, struct list_head list,
	* returning BLK_STS_RESOURCE. Two exceptions are scsi-mq		* returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
	* and dm-rq.		* and dm-rq.
	*/		*/
	if (!blk_mq_sched_needs_restart(hctx) &&		if (!blk_mq_sched_needs_restart(hctx) \|\|
	!test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))		(no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
	blk_mq_run_hw_queue(hctx, true);		blk_mq_run_hw_queue(hctx, true);
	}		}

	@@ -2020,6 +2028,9 @@ static int blk_mq_init_hctx(struct request_queue *q,

	hctx->nr_ctx = 0;		hctx->nr_ctx = 0;

			init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
			INIT_LIST_HEAD(&hctx->dispatch_wait.entry);

	if (set->ops->init_hctx &&		if (set->ops->init_hctx &&
	set->ops->init_hctx(hctx, set->driver_data, hctx_idx))		set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
	goto free_bitmap;		goto free_bitmap;

include/linux/blk-mq.h

+2 −3

Original line number	Original line	Diff line number	Diff line
	@@ -181,8 +181,7 @@ enum {
	BLK_MQ_S_STOPPED = 0,		BLK_MQ_S_STOPPED = 0,
	BLK_MQ_S_TAG_ACTIVE = 1,		BLK_MQ_S_TAG_ACTIVE = 1,
	BLK_MQ_S_SCHED_RESTART = 2,		BLK_MQ_S_SCHED_RESTART = 2,
	BLK_MQ_S_TAG_WAITING = 3,		BLK_MQ_S_START_ON_RUN = 3,
	BLK_MQ_S_START_ON_RUN = 4,

	BLK_MQ_MAX_DEPTH = 10240,		BLK_MQ_MAX_DEPTH = 10240,