Unverified Commit 7d450053 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!11942 sbitmap: backport bugfix patches

Merge Pull Request from: @ci-robot 
 
PR sync from: Yu Kuai <yukuai3@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/V3LCUNO32U6O2ZB5NOOGFS536B6BWXJ4/ 
Hugh Dickins (1):
  sbitmap: fix lockup while swapping

Jan Kara (1):
  sbitmap: Avoid leaving waitqueue in invalid state in __sbq_wake_up()

Kemeng Shi (3):
  sbitmap: correct wake_batch recalculation to avoid potential IO hung
  blk-mq: wait on correct sbitmap_queue in blk_mq_mark_tag_wait
  blk-mq: Fix potential io hung for shared sbitmap per tagset

Laibin Qiu (2):
  blk-mq: fix tag_get wait task can't be awakened
  blk-mq: Fix wrong wakeup batch configuration which will cause hang

Li Lingfeng (1):
  block: Fix lockdep warning in blk_mq_mark_tag_wait

Yu Kuai (2):
  blk-mq: fix potential io hang by wrong 'wake_batch'
  sbitmap: fix possible io hung due to lost wakeup


-- 
2.39.2
 
https://gitee.com/openeuler/kernel/issues/IAQPKU 
 
Link:https://gitee.com/openeuler/kernel/pulls/11942

 

Reviewed-by: default avatarYe Weihua <yeweihua4@huawei.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
parents 17befaf8 65267b13
Loading
Loading
Loading
Loading
+38 −8
Original line number Diff line number Diff line
@@ -18,6 +18,21 @@

#define BLK_MQ_DTAG_WAIT_EXPIRE (5 * HZ)

/*
 * Recalculate wakeup batch when tag is shared by hctx.
 */
static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
		unsigned int users)
{
	if (!users)
		return;

	sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
			users);
	sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
			users);
}

/*
 * If a previously inactive queue goes active, bump the active user count.
 * We need to do this before try to allocate driver tag, then even if fail
@@ -26,18 +41,29 @@
 */
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
	unsigned int users;
	unsigned long flags;
	struct blk_mq_tags *tags = hctx->tags;

	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
		struct request_queue *q = hctx->queue;

		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
		    !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
			atomic_inc(&hctx->tags->active_queues);
		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
		    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
			return true;
		}
	} else {
		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
		    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
			atomic_inc(&hctx->tags->active_queues);
		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
		    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
			return true;
		}
	}

	spin_lock_irqsave(&tags->lock, flags);
	users = atomic_inc_return(&tags->active_queues);
	blk_mq_update_wake_batch(tags, users);
	spin_unlock_irqrestore(&tags->lock, flags);

	return true;
}

@@ -58,6 +84,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
	struct blk_mq_tags *tags = hctx->tags;
	unsigned int users;

	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
		struct request_queue *q = hctx->queue;
@@ -65,13 +92,16 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
		if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
					&q->queue_flags))
			return;
		atomic_dec(&tags->active_queues);
	} else {
		if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
			return;
		atomic_dec(&tags->active_queues);
	}

	spin_lock_irq(&tags->lock);
	users = atomic_dec_return(&tags->active_queues);
	blk_mq_update_wake_batch(tags, users);
	spin_unlock_irq(&tags->lock);

	blk_mq_tag_wakeup_all(tags, false);
}

+9 −3
Original line number Diff line number Diff line
@@ -1240,12 +1240,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
				 struct request *rq)
{
	struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
	struct sbitmap_queue *sbq;
	struct wait_queue_head *wq;
	wait_queue_entry_t *wait;
	bool ret;

	if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
	if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
	    !(blk_mq_is_sbitmap_shared(hctx->flags))) {
		blk_mq_sched_mark_restart_hctx(hctx);

		/*
@@ -1263,6 +1264,10 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
	if (!list_empty_careful(&wait->entry))
		return false;

	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag))
		sbq = &hctx->tags->breserved_tags;
	else
		sbq = &hctx->tags->bitmap_tags;
	wq = &bt_wait_ptr(sbq, hctx)->wait;

	spin_lock_irq(&wq->lock);
@@ -1520,7 +1525,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
		bool needs_restart;
		/* For non-shared tags, the RESTART check will suffice */
		bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
			(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
			((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
			blk_mq_is_sbitmap_shared(hctx->flags));

		blk_mq_release_budgets(q, nr_budgets);

+11 −0
Original line number Diff line number Diff line
@@ -381,6 +381,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
	sbitmap_free(&sbq->sb);
}

/**
 * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
 * @sbq: Bitmap queue to recalculate wake batch.
 * @users: Number of shares.
 *
 * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
 * by depth. This interface is for HCTX shared tags or queue shared tags.
 */
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
					    unsigned int users);

/**
 * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
 * @sbq: Bitmap queue to resize.
+70 −27
Original line number Diff line number Diff line
@@ -395,10 +395,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);

static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
					    unsigned int depth)
static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
					    unsigned int wake_batch)
{
	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
	int i;

	if (sbq->wake_batch != wake_batch) {
@@ -414,6 +413,27 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
	}
}

static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
					    unsigned int depth)
{
	unsigned int wake_batch;

	wake_batch = sbq_calc_wake_batch(sbq, depth);
	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
}

void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
					    unsigned int users)
{
	unsigned int wake_batch;
	unsigned int depth = (sbq->sb.depth + users - 1) / users;

	wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES,
			1, SBQ_WAKE_BATCH);
	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);

void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
	sbitmap_queue_update_wake_batch(sbq, depth);
@@ -499,7 +519,7 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
		struct sbq_wait_state *ws = &sbq->ws[wake_index];

		if (waitqueue_active(&ws->wait)) {
		if (waitqueue_active(&ws->wait) && atomic_read(&ws->wait_cnt)) {
			if (wake_index != atomic_read(&sbq->wake_index))
				atomic_set(&sbq->wake_index, wake_index);
			return ws;
@@ -516,40 +536,63 @@ static bool __sbq_wake_up(struct sbitmap_queue *sbq)
	struct sbq_wait_state *ws;
	unsigned int wake_batch;
	int wait_cnt;
	bool ret;

	ws = sbq_wake_ptr(sbq);
	if (!ws)
		return false;

	wait_cnt = atomic_dec_return(&ws->wait_cnt);
	if (wait_cnt <= 0) {
		int ret;
	/*
	 * For concurrent callers of this, callers should call this function
	 * again to wakeup a new batch on a different 'ws'.
	 */
	if (wait_cnt < 0)
		return true;

	/*
	 * If we decremented queue without waiters, retry to avoid lost
	 * wakeups.
	 */
	if (wait_cnt > 0)
		return !waitqueue_active(&ws->wait);

	/*
	 * When wait_cnt == 0, we have to be particularly careful as we are
	 * responsible to reset wait_cnt regardless whether we've actually
	 * woken up anybody. But in case we didn't wakeup anybody, we still
	 * need to retry.
	 */
	ret = !waitqueue_active(&ws->wait);
	wake_batch = READ_ONCE(sbq->wake_batch);

	/*
	 * Wake up first in case that concurrent callers decrease wait_cnt
	 * while waitqueue is empty.
	 */
	wake_up_nr(&ws->wait, wake_batch);

	/*
	 * Pairs with the memory barrier in sbitmap_queue_resize() to
	 * ensure that we see the batch size update before the wait
	 * count is reset.
	 *
	 * Also pairs with the implicit barrier between decrementing wait_cnt
	 * and checking for waitqueue_active() to make sure waitqueue_active()
	 * sees result of the wakeup if atomic_dec_return() has seen the result
	 * of atomic_set().
	 */
	smp_mb__before_atomic();

	/*
		 * For concurrent callers of this, the one that failed the
		 * atomic_cmpxhcg() race should call this function again
		 * to wakeup a new batch on a different 'ws'.
	 * Increase wake_index before updating wait_cnt, otherwise concurrent
	 * callers can see valid wait_cnt in old waitqueue, which can cause
	 * invalid wakeup on the old waitqueue.
	 */
		ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
		if (ret == wait_cnt) {
	sbq_index_atomic_inc(&sbq->wake_index);
			wake_up_nr(&ws->wait, wake_batch);
			return false;
		}
	atomic_set(&ws->wait_cnt, wake_batch);

		return true;
	}

	return false;
	return ret;
}

void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)