Commit eed9a328 authored by Yu Zhao's avatar Yu Zhao Committed by Andrew Morton
Browse files

mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG

Some architectures support the accessed bit in non-leaf PMD entries, e.g.,
x86 sets the accessed bit in a non-leaf PMD entry when using it as part of
linear address translation [1].  Page table walkers that clear the
accessed bit may use this capability to reduce their search space.

Note that:
1. Although an inline function is preferable, this capability is added
   as a configuration option for consistency with the existing macros.
2. Due to the little interest in other varieties, this capability was
   only tested on Intel and AMD CPUs.

Thanks to the following developers for their efforts [2][3].
  Randy Dunlap <rdunlap@infradead.org>
  Stephen Rothwell <sfr@canb.auug.org.au>

[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
     Volume 3 (June 2021), section 4.8
[2] https://lore.kernel.org/r/bfdcc7c8-922f-61a9-aa15-7e7250f04af7@infradead.org/
[3] https://lore.kernel.org/r/20220413151513.5a0d7a7e@canb.auug.org.au/

Link: https://lkml.kernel.org/r/20220918080010.2920238-3-yuzhao@google.com


Signed-off-by: default avatarYu Zhao <yuzhao@google.com>
Reviewed-by: default avatarBarry Song <baohua@kernel.org>
Acked-by: default avatarBrian Geffon <bgeffon@google.com>
Acked-by: default avatarJan Alexander Steffens (heftig) <heftig@archlinux.org>
Acked-by: default avatarOleksandr Natalenko <oleksandr@natalenko.name>
Acked-by: default avatarSteven Barrett <steven@liquorix.net>
Acked-by: default avatarSuleiman Souhlal <suleiman@google.com>
Tested-by: default avatarDaniel Byrne <djbyrne@mtu.edu>
Tested-by: default avatarDonald Carr <d@chaos-reins.com>
Tested-by: default avatarHolger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: default avatarKonstantin Kharlamov <Hi-Angel@yandex.ru>
Tested-by: default avatarShuang Zhai <szhai2@cs.rochester.edu>
Tested-by: default avatarSofia Trinh <sofia.trinh@edi.works>
Tested-by: default avatarVaibhav Jain <vaibhav@linux.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michael Larabel <Michael@MichaelLarabel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent e1fd09e3
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -1415,6 +1415,14 @@ config DYNAMIC_SIGFRAME
config HAVE_ARCH_NODE_DEV_GROUP
	bool

config ARCH_HAS_NONLEAF_PMD_YOUNG
	bool
	help
	  Architectures that select this option are capable of setting the
	  accessed bit in non-leaf PMD entries when using them as part of linear
	  address translations. Page table walkers that clear the accessed bit
	  may use this capability to reduce their search space.

source "kernel/gcov/Kconfig"

source "scripts/gcc-plugins/Kconfig"
+1 −0
Original line number Diff line number Diff line
@@ -85,6 +85,7 @@ config X86
	select ARCH_HAS_PMEM_API		if X86_64
	select ARCH_HAS_PTE_DEVMAP		if X86_64
	select ARCH_HAS_PTE_SPECIAL
	select ARCH_HAS_NONLEAF_PMD_YOUNG	if PGTABLE_LEVELS > 2
	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
	select ARCH_HAS_COPY_MC			if X86_64
	select ARCH_HAS_SET_MEMORY
+2 −1
Original line number Diff line number Diff line
@@ -815,7 +815,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)

static inline int pmd_bad(pmd_t pmd)
{
	return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
	return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
	       (_KERNPG_TABLE & ~_PAGE_ACCESSED);
}

static inline unsigned long pages_to_mb(unsigned long npg)
+4 −1
Original line number Diff line number Diff line
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
	return ret;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
			      unsigned long addr, pmd_t *pmdp)
{
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,

	return ret;
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pudp_test_and_clear_young(struct vm_area_struct *vma,
			      unsigned long addr, pud_t *pudp)
{
+2 −2
Original line number Diff line number Diff line
@@ -213,7 +213,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
#endif

#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
					    unsigned long address,
					    pmd_t *pmdp)
@@ -234,7 +234,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
	BUILD_BUG();
	return 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH