Commit 4aaf269c authored by Juergen Gross's avatar Juergen Gross Committed by Andrew Morton
Browse files

mm: introduce arch_has_hw_nonleaf_pmd_young()

When running as a Xen PV guests commit eed9a328 ("mm: x86: add
CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation in
pmdp_test_and_clear_young():

 BUG: unable to handle page fault for address: ffff8880083374d0
 #PF: supervisor write access in kernel mode
 #PF: error_code(0x0003) - permissions violation
 PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065
 Oops: 0003 [#1] PREEMPT SMP NOPTI
 CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1
 RIP: e030:pmdp_test_and_clear_young+0x25/0x40

This happens because the Xen hypervisor can't emulate direct writes to
page table entries other than PTEs.

This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young()
similar to arch_has_hw_pte_young() and test that instead of
CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG.

Link: https://lkml.kernel.org/r/20221123064510.16225-1-jgross@suse.com


Fixes: eed9a328 ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG")
Signed-off-by: default avatarJuergen Gross <jgross@suse.com>
Reported-by: default avatarSander Eikelenboom <linux@eikelenboom.it>
Acked-by: default avatarYu Zhao <yuzhao@google.com>
Tested-by: default avatarSander Eikelenboom <linux@eikelenboom.it>
Acked-by: David Hildenbrand <david@redhat.com>	[core changes]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 6617da8f
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -1439,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void)
	return true;
}

#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
{
	return !cpu_feature_enabled(X86_FEATURE_XENPV);
}
#endif

#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{
+11 −0
Original line number Diff line number Diff line
@@ -267,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif

#ifndef arch_has_hw_nonleaf_pmd_young
/*
 * Return whether the accessed bit in non-leaf PMD entries is supported on the
 * local CPU.
 */
static inline bool arch_has_hw_nonleaf_pmd_young(void)
{
	return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
}
#endif

#ifndef arch_has_hw_pte_young
/*
 * Return whether the accessed bit is supported on the local CPU.
+5 −5
Original line number Diff line number Diff line
@@ -3987,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
			goto next;

		if (!pmd_trans_huge(pmd[i])) {
			if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
			if (arch_has_hw_nonleaf_pmd_young() &&
			    get_cap(LRU_GEN_NONLEAF_YOUNG))
				pmdp_test_and_clear_young(vma, addr, pmd + i);
			goto next;
@@ -4085,14 +4085,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
#endif
		walk->mm_stats[MM_NONLEAF_TOTAL]++;

#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
		if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
		if (arch_has_hw_nonleaf_pmd_young() &&
		    get_cap(LRU_GEN_NONLEAF_YOUNG)) {
			if (!pmd_young(val))
				continue;

			walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
		}
#endif

		if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
			continue;

@@ -5392,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
	if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
		caps |= BIT(LRU_GEN_MM_WALK);

	if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
	if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
		caps |= BIT(LRU_GEN_NONLEAF_YOUNG);

	return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);