Commit bb5e0f58 authored by Baokun Li's avatar Baokun Li
Browse files

blk-wbt: don't throttle swap writes in direct reclaim

mainline inclusion
from mainline-v6.11-rc1
commit 4e63aeb5d0101ddada36a2f64f048e2f9d2202fc
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IA8D5J

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4e63aeb5d0101ddada36a2f64f048e2f9d2202fc



--------------------------------

Now we avoid throttling swap writes by determining whether the current
process is kswapd (aka current_is_kswapd()), but swap writes can come
from either kswapd or direct reclaim, so the swap writes from direct
reclaim will still be throttled.

When a process holds a lock to allocate a free page, and enters direct
reclaim because there is no free memory, then it might trigger a hung
due to the wbt throttling that causes other processes to fail to get
the lock.

Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP
instead of current_is_kswapd() to avoid throttling swap writes. Also
renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP.

Signed-off-by: default avatarBaokun Li <libaokun1@huawei.com>
Reviewed-by: default avatarYu Kuai <yukuai3@huawei.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240604030522.3686177-1-libaokun@huaweicloud.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarBaokun Li <libaokun1@huawei.com>
parent fbff37d5
Loading
Loading
Loading
Loading
+9 −9
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@
enum wbt_flags {
	WBT_TRACKED		= 1,	/* write, tracked for throttling */
	WBT_READ		= 2,	/* read */
	WBT_KSWAPD		= 4,	/* write, from kswapd */
	WBT_SWAP		= 4,	/* write, from swap_writepage() */
	WBT_DISCARD		= 8,	/* discard */

	WBT_NR_BITS		= 4,	/* number of bits */
@@ -44,7 +44,7 @@ enum wbt_flags {

enum {
	WBT_RWQ_BG		= 0,
	WBT_RWQ_KSWAPD,
	WBT_RWQ_SWAP,
	WBT_RWQ_DISCARD,
	WBT_NUM_RWQ,
};
@@ -173,8 +173,8 @@ static bool wb_recent_wait(struct rq_wb *rwb)
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
					  enum wbt_flags wb_acct)
{
	if (wb_acct & WBT_KSWAPD)
		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
	if (wb_acct & WBT_SWAP)
		return &rwb->rq_wait[WBT_RWQ_SWAP];
	else if (wb_acct & WBT_DISCARD)
		return &rwb->rq_wait[WBT_RWQ_DISCARD];

@@ -529,7 +529,7 @@ static bool close_io(struct rq_wb *rwb)
		time_before(now, rwb->last_comp + HZ / 10);
}

#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
#define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP)

static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
{
@@ -540,13 +540,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)

	/*
	 * At this point we know it's a buffered write. If this is
	 * kswapd trying to free memory, or REQ_SYNC is set, then
	 * swap trying to free memory, or REQ_SYNC is set, then
	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
	 * that. If the write is marked as a background write, then use
	 * the idle limit, or go to normal if we haven't had competing
	 * IO for a bit.
	 */
	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
	if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb))
		limit = rwb->rq_depth.max_depth;
	else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
		/*
@@ -623,8 +623,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
	if (bio_op(bio) == REQ_OP_READ) {
		flags = WBT_READ;
	} else if (wbt_should_throttle(bio)) {
		if (current_is_kswapd())
			flags |= WBT_KSWAPD;
		if (bio->bi_opf & REQ_SWAP)
			flags |= WBT_SWAP;
		if (bio_op(bio) == REQ_OP_DISCARD)
			flags |= WBT_DISCARD;
		flags |= WBT_TRACKED;