Commit 354ed597 authored by Yu Zhao's avatar Yu Zhao Committed by Andrew Morton
Browse files

mm: multi-gen LRU: kill switch

Add /sys/kernel/mm/lru_gen/enabled as a kill switch. Components that
can be disabled include:
  0x0001: the multi-gen LRU core
  0x0002: walking page table, when arch_has_hw_pte_young() returns
          true
  0x0004: clearing the accessed bit in non-leaf PMD entries, when
          CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
  [yYnN]: apply to all the components above
E.g.,
  echo y >/sys/kernel/mm/lru_gen/enabled
  cat /sys/kernel/mm/lru_gen/enabled
  0x0007
  echo 5 >/sys/kernel/mm/lru_gen/enabled
  cat /sys/kernel/mm/lru_gen/enabled
  0x0005

NB: the page table walks happen on the scale of seconds under heavy memory
pressure, in which case the mmap_lock contention is a lesser concern,
compared with the LRU lock contention and the I/O congestion.  So far the
only well-known case of the mmap_lock contention happens on Android, due
to Scudo [1] which allocates several thousand VMAs for merely a few
hundred MBs.  The SPF and the Maple Tree also have provided their own
assessments [2][3].  However, if walking page tables does worsen the
mmap_lock contention, the kill switch can be used to disable it.  In this
case the multi-gen LRU will suffer a minor performance degradation, as
shown previously.

Clearing the accessed bit in non-leaf PMD entries can also be disabled,
since this behavior was not tested on x86 varieties other than Intel and
AMD.

[1] https://source.android.com/devices/tech/debug/scudo
[2] https://lore.kernel.org/r/20220128131006.67712-1-michel@lespinasse.org/
[3] https://lore.kernel.org/r/20220426150616.3937571-1-Liam.Howlett@oracle.com/

Link: https://lkml.kernel.org/r/20220918080010.2920238-11-yuzhao@google.com


Signed-off-by: default avatarYu Zhao <yuzhao@google.com>
Acked-by: default avatarBrian Geffon <bgeffon@google.com>
Acked-by: default avatarJan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: default avatarOleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: default avatarSteven Barrett <steven@liquorix.net>
Acked-by: default avatarSuleiman Souhlal <suleiman@google.com>
Tested-by: default avatarDaniel Byrne <djbyrne@mtu.edu>
Tested-by: default avatarDonald Carr <d@chaos-reins.com>
Tested-by: default avatarHolger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: default avatarKonstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: default avatarShuang Zhai <szhai2@cs.rochester.edu>
Tested-by: default avatarSofia Trinh <sofia.trinh@edi.works>
Tested-by: default avatarVaibhav Jain <vaibhav@linux.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michael Larabel <Michael@MichaelLarabel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent f76c8337
Loading
Loading
Loading
Loading
+14 −1
Original line number Diff line number Diff line
@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp)
	css_put(&cgrp->self);
}

extern struct mutex cgroup_mutex;

static inline void cgroup_lock(void)
{
	mutex_lock(&cgroup_mutex);
}

static inline void cgroup_unlock(void)
{
	mutex_unlock(&cgroup_mutex);
}

/**
 * task_css_set_check - obtain a task's css_set with extra access conditions
 * @task: the task to obtain css_set for
@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
 * as locks used during the cgroup_subsys::attach() methods.
 */
#ifdef CONFIG_PROVE_RCU
extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c)					\
	rcu_dereference_check((task)->cgroups,				\
@@ -708,6 +719,8 @@ struct cgroup;
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
static inline void css_get(struct cgroup_subsys_state *css) {}
static inline void css_put(struct cgroup_subsys_state *css) {}
static inline void cgroup_lock(void) {}
static inline void cgroup_unlock(void) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
					 struct task_struct *t) { return 0; }
static inline int cgroupstats_build(struct cgroupstats *stats,
+13 −2
Original line number Diff line number Diff line
@@ -106,10 +106,21 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio)

#ifdef CONFIG_LRU_GEN

#ifdef CONFIG_LRU_GEN_ENABLED
static inline bool lru_gen_enabled(void)
{
	return true;
	DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);

	return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
}
#else
static inline bool lru_gen_enabled(void)
{
	DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);

	return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
}
#endif

static inline bool lru_gen_in_fault(void)
{
@@ -222,7 +233,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,

	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);

	if (folio_test_unevictable(folio))
	if (folio_test_unevictable(folio) || !lrugen->enabled)
		return false;
	/*
	 * There are three common cases for this page:
+9 −0
Original line number Diff line number Diff line
@@ -387,6 +387,13 @@ enum {
	LRU_GEN_FILE,
};

enum {
	LRU_GEN_CORE,
	LRU_GEN_MM_WALK,
	LRU_GEN_NONLEAF_YOUNG,
	NR_LRU_GEN_CAPS
};

#define MIN_LRU_BATCH		BITS_PER_LONG
#define MAX_LRU_BATCH		(MIN_LRU_BATCH * 64)

@@ -428,6 +435,8 @@ struct lru_gen_struct {
	/* can be modified without holding the LRU lock */
	atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
	/* whether the multi-gen LRU is enabled */
	bool enabled;
};

enum {
+0 −1
Original line number Diff line number Diff line
@@ -164,7 +164,6 @@ struct cgroup_mgctx {
#define DEFINE_CGROUP_MGCTX(name)						\
	struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)

extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
extern struct cgroup_subsys *cgroup_subsys[];
extern struct list_head cgroup_roots;
+6 −0
Original line number Diff line number Diff line
@@ -1127,6 +1127,12 @@ config LRU_GEN
	help
	  A high performance LRU implementation to overcommit memory.

config LRU_GEN_ENABLED
	bool "Enable by default"
	depends on LRU_GEN
	help
	  This option enables the multi-gen LRU by default.

config LRU_GEN_STATS
	bool "Full stats for debugging"
	depends on LRU_GEN
Loading