Commit c3f4a9a2 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds
Browse files

mm/vmscan: centralise timeout values for reclaim_throttle

Neil Brown raised concerns about callers of reclaim_throttle specifying
a timeout value.  The original timeout values to congestion_wait() were
probably pulled out of thin air or copy&pasted from somewhere else.
This patch centralises the timeout values and selects a timeout based on
the reason for reclaim throttling.  These figures are also pulled out of
the same thin air but better values may be derived

Running a workload that is throttling for inappropriate periods and
tracing mm_vmscan_throttled can be used to pick a more appropriate
value.  Excessive throttling would pick a lower timeout where as
excessive CPU usage in reclaim context would select a larger timeout.
Ideally a large value would always be used and the wakeups would occur
before a timeout but that requires careful testing.

Link: https://lkml.kernel.org/r/20211022144651.19914-7-mgorman@techsingularity.net


Signed-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Darrick J . Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 132b0d21
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -828,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
		if (cc->mode == MIGRATE_ASYNC)
			return -EAGAIN;

		reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10);
		reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED);

		if (fatal_signal_pending(current))
			return -EINTR;
+1 −2
Original line number Diff line number Diff line
@@ -130,8 +130,7 @@ extern unsigned long highest_memmap_pfn;
 */
extern int isolate_lru_page(struct page *page);
extern void putback_lru_page(struct page *page);
extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
								long timeout);
extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);

/*
 * in mm/rmap.c:
+1 −1
Original line number Diff line number Diff line
@@ -2374,7 +2374,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
		 * guess as any.
		 */
		reclaim_throttle(NODE_DATA(numa_node_id()),
			VMSCAN_THROTTLE_WRITEBACK, HZ/50);
			VMSCAN_THROTTLE_WRITEBACK);
	}
	/*
	 * Usually few pages are written by now from those we've just submitted
+37 −13
Original line number Diff line number Diff line
@@ -1006,12 +1006,10 @@ static void handle_write_error(struct address_space *mapping,
	unlock_page(page);
}

void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
							long timeout)
void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
{
	wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
	long ret;
	bool acct_writeback = (reason == VMSCAN_THROTTLE_WRITEBACK);
	long timeout, ret;
	DEFINE_WAIT(wait);

	/*
@@ -1023,17 +1021,43 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
	    current->flags & (PF_IO_WORKER|PF_KTHREAD))
		return;

	if (acct_writeback &&
	    atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
	/*
	 * These figures are pulled out of thin air.
	 * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many
	 * parallel reclaimers which is a short-lived event so the timeout is
	 * short. Failing to make progress or waiting on writeback are
	 * potentially long-lived events so use a longer timeout. This is shaky
	 * logic as a failure to make progress could be due to anything from
	 * writeback to a slow device to excessive references pages at the tail
	 * of the inactive LRU.
	 */
	switch(reason) {
	case VMSCAN_THROTTLE_WRITEBACK:
		timeout = HZ/10;

		if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
			WRITE_ONCE(pgdat->nr_reclaim_start,
				node_page_state(pgdat, NR_THROTTLED_WRITTEN));
		}

		break;
	case VMSCAN_THROTTLE_NOPROGRESS:
		timeout = HZ/10;
		break;
	case VMSCAN_THROTTLE_ISOLATED:
		timeout = HZ/50;
		break;
	default:
		WARN_ON_ONCE(1);
		timeout = HZ;
		break;
	}

	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
	ret = schedule_timeout(timeout);
	finish_wait(wqh, &wait);

	if (acct_writeback)
	if (reason == VMSCAN_THROTTLE_WRITEBACK)
		atomic_dec(&pgdat->nr_writeback_throttled);

	trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout),
@@ -2318,7 +2342,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,

		/* wait a bit for the reclaimer. */
		stalled = true;
		reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10);
		reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED);

		/* We are about to die and free our memory. Return now. */
		if (fatal_signal_pending(current))
@@ -3250,7 +3274,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
		 * until some pages complete writeback.
		 */
		if (sc->nr.immediate)
			reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10);
			reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
	}

	/*
@@ -3274,7 +3298,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
	if (!current_is_kswapd() && current_may_throttle() &&
	    !sc->hibernation_mode &&
	    test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
		reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10);
		reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);

	if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
				    sc))
@@ -3346,7 +3370,7 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc)

	/* Throttle if making no progress at high prioities. */
	if (sc->priority < DEF_PRIORITY - 2)
		reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS, HZ/10);
		reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS);
}

/*