Unverified Commit e59105d1 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!11040 mm: lazyfree THP support

Merge Pull Request from: @ci-robot 
 
PR sync from: Liu Shixin <liushixin2@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/2RWKONOJ3F7BHHQ25ZGXPYFP4GPAQW5S/ 
Andrew Morton (1):
  mm/huge_memory.c: fix used-uninitialized

Barry Song (1):
  mm: arm64: fix the out-of-bounds issue in
    contpte_clear_young_dirty_ptes

Lance Yang (7):
  mm/madvise: introduce clear_young_dirty_ptes() batch helper
  mm/arm64: override clear_young_dirty_ptes() batch helper
  mm/memory: add any_dirty optional pointer to folio_pte_batch()
  mm/madvise: optimize lazyfreeing with mTHP in madvise_free
  mm/rmap: remove duplicated exit code in pagewalk loop
  mm/rmap: integrate PMD-mapped folio splitting into pagewalk loop
  mm/vmscan: avoid split lazyfree THP during shrink_folio_list()

Matthew Wilcox (Oracle) (1):
  mm: add pmd_folio()

Peter Xu (2):
  mm/Kconfig: CONFIG_PGTABLE_HAS_HUGE_LEAVES
  mm: make HPAGE_PXD_* macros even if !THP


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/IAIHQO 
 
Link:https://gitee.com/openeuler/kernel/pulls/11040

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents e65c0786 f0db1328
Loading
Loading
Loading
Loading
+55 −0
Original line number Diff line number Diff line
@@ -1036,6 +1036,46 @@ static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address,
		__ptep_set_wrprotect(mm, address, ptep);
}

static inline void __clear_young_dirty_pte(struct vm_area_struct *vma,
					   unsigned long addr, pte_t *ptep,
					   pte_t pte, cydp_t flags)
{
	pte_t old_pte;

	do {
		old_pte = pte;

		if (flags & CYDP_CLEAR_YOUNG)
			pte = pte_mkold(pte);
		if (flags & CYDP_CLEAR_DIRTY)
			pte = pte_mkclean(pte);

		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
					       pte_val(old_pte), pte_val(pte));
	} while (pte_val(pte) != pte_val(old_pte));
}

static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma,
					    unsigned long addr, pte_t *ptep,
					    unsigned int nr, cydp_t flags)
{
	pte_t pte;

	for (;;) {
		pte = __ptep_get(ptep);

		if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY))
			__set_pte(ptep, pte_mkclean(pte_mkold(pte)));
		else
			__clear_young_dirty_pte(vma, addr, ptep, pte, flags);

		if (--nr == 0)
			break;
		ptep++;
		addr += PAGE_SIZE;
	}
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
@@ -1204,6 +1244,9 @@ extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
				unsigned long addr, pte_t *ptep,
				pte_t entry, int dirty);
extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
				unsigned long addr, pte_t *ptep,
				unsigned int nr, cydp_t flags);

static __always_inline void contpte_try_fold(struct mm_struct *mm,
				unsigned long addr, pte_t *ptep, pte_t pte)
@@ -1428,6 +1471,17 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
	return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
}

#define clear_young_dirty_ptes clear_young_dirty_ptes
static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
					  unsigned long addr, pte_t *ptep,
					  unsigned int nr, cydp_t flags)
{
	if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
		__clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
	else
		contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
}

#else /* CONFIG_ARM64_CONTPTE */

#define ptep_get				__ptep_get
@@ -1447,6 +1501,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
#define wrprotect_ptes				__wrprotect_ptes
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
#define ptep_set_access_flags			__ptep_set_access_flags
#define clear_young_dirty_ptes			__clear_young_dirty_ptes

#endif /* CONFIG_ARM64_CONTPTE */

+29 −0
Original line number Diff line number Diff line
@@ -361,6 +361,35 @@ void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL_GPL(contpte_wrprotect_ptes);

void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
				    unsigned long addr, pte_t *ptep,
				    unsigned int nr, cydp_t flags)
{
	/*
	 * We can safely clear access/dirty without needing to unfold from
	 * the architectures perspective, even when contpte is set. If the
	 * range starts or ends midway through a contpte block, we can just
	 * expand to include the full contpte block. While this is not
	 * exactly what the core-mm asked for, it tracks access/dirty per
	 * folio, not per page. And since we only create a contpte block
	 * when it is covered by a single folio, we can get away with
	 * clearing access/dirty for the whole block.
	 */
	unsigned long start = addr;
	unsigned long end = start + nr * PAGE_SIZE;

	if (pte_cont(__ptep_get(ptep + nr - 1)))
		end = ALIGN(end, CONT_PTE_SIZE);

	if (pte_cont(__ptep_get(ptep))) {
		start = ALIGN_DOWN(start, CONT_PTE_SIZE);
		ptep = contpte_align_down(ptep);
	}

	__clear_young_dirty_ptes(vma, start, ptep, (end - start) / PAGE_SIZE, flags);
}
EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);

int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
					unsigned long addr, pte_t *ptep,
					pte_t entry, int dirty)
+30 −14
Original line number Diff line number Diff line
@@ -69,9 +69,6 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj,
				  enum transparent_hugepage_flag flag);
extern struct kobj_attribute shmem_enabled_attr;

#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)

/*
 * Mask of all large folio orders supported for anonymous THP; all orders up to
 * and including PMD_ORDER, except order-0 (which is not "huge") and order-1
@@ -102,14 +99,25 @@ extern struct kobj_attribute shmem_enabled_attr;
#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
	(!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE	((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PUD_SHIFT PUD_SHIFT
#else
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
#endif

#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
#define HPAGE_PMD_MASK	(~(HPAGE_PMD_SIZE - 1))
#define HPAGE_PMD_SIZE	((1UL) << HPAGE_PMD_SHIFT)

#define HPAGE_PUD_SHIFT PUD_SHIFT
#define HPAGE_PUD_SIZE	((1UL) << HPAGE_PUD_SHIFT)
#define HPAGE_PUD_ORDER (HPAGE_PUD_SHIFT-PAGE_SHIFT)
#define HPAGE_PUD_NR (1<<HPAGE_PUD_ORDER)
#define HPAGE_PUD_MASK	(~(HPAGE_PUD_SIZE - 1))
#define HPAGE_PUD_SIZE	((1UL) << HPAGE_PUD_SHIFT)

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

extern unsigned long transparent_hugepage_flags;
extern unsigned long huge_anon_orders_always;
@@ -405,14 +413,12 @@ static inline bool thp_migration_supported(void)
	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}

#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
			   pmd_t *pmd, bool freeze, struct folio *folio);
bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
			   pmd_t *pmdp, struct folio *folio);

#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
#else /* CONFIG_TRANSPARENT_HUGEPAGE */

static inline bool folio_test_pmd_mappable(struct folio *folio)
{
@@ -470,6 +476,16 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long address, bool freeze, struct folio *folio) {}
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
		unsigned long address, bool freeze, struct folio *folio) {}
static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
					 unsigned long address, pmd_t *pmd,
					 bool freeze, struct folio *folio) {}

static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma,
					 unsigned long addr, pmd_t *pmdp,
					 struct folio *folio)
{
	return false;
}

#define split_huge_pud(__vma, __pmd, __address)	\
	do { } while (0)
+9 −0
Original line number Diff line number Diff line
@@ -1346,6 +1346,15 @@ enum fault_flag {

typedef unsigned int __bitwise zap_flags_t;

/* Flags for clear_young_dirty_ptes(). */
typedef int __bitwise cydp_t;

/* Clear the access bit */
#define CYDP_CLEAR_YOUNG		((__force cydp_t)BIT(0))

/* Clear the dirty bit */
#define CYDP_CLEAR_DIRTY		((__force cydp_t)BIT(1))

/*
 * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
 * other. Here is what they mean, and how to use them:
+46 −30
Original line number Diff line number Diff line
@@ -50,6 +50,8 @@
#define pmd_pgtable(pmd) pmd_page(pmd)
#endif

#define pmd_folio(pmd) page_folio(pmd_page(pmd))

/*
 * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
 *
@@ -333,36 +335,6 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
}
#endif

#ifndef mkold_ptes
/**
 * mkold_ptes - Mark PTEs that map consecutive pages of the same folio as old.
 * @vma: VMA the pages are mapped into.
 * @addr: Address the first page is mapped at.
 * @ptep: Page table pointer for the first entry.
 * @nr: Number of entries to mark old.
 *
 * May be overridden by the architecture; otherwise, implemented as a simple
 * loop over ptep_test_and_clear_young().
 *
 * Note that PTE bits in the PTE range besides the PFN can differ. For example,
 * some PTEs might be write-protected.
 *
 * Context: The caller holds the page table lock.  The PTEs map consecutive
 * pages that belong to the same folio.  The PTEs are all in the same PMD.
 */
static inline void mkold_ptes(struct vm_area_struct *vma, unsigned long addr,
		pte_t *ptep, unsigned int nr)
{
	for (;;) {
		ptep_test_and_clear_young(vma, addr, ptep);
		if (--nr == 0)
			break;
		ptep++;
		addr += PAGE_SIZE;
	}
}
#endif

#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
@@ -473,6 +445,50 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
}
#endif

#ifndef clear_young_dirty_ptes
/**
 * clear_young_dirty_ptes - Mark PTEs that map consecutive pages of the
 *		same folio as old/clean.
 * @mm: Address space the pages are mapped into.
 * @addr: Address the first page is mapped at.
 * @ptep: Page table pointer for the first entry.
 * @nr: Number of entries to mark old/clean.
 * @flags: Flags to modify the PTE batch semantics.
 *
 * May be overridden by the architecture; otherwise, implemented by
 * get_and_clear/modify/set for each pte in the range.
 *
 * Note that PTE bits in the PTE range besides the PFN can differ. For example,
 * some PTEs might be write-protected.
 *
 * Context: The caller holds the page table lock.  The PTEs map consecutive
 * pages that belong to the same folio.  The PTEs are all in the same PMD.
 */
static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
					  unsigned long addr, pte_t *ptep,
					  unsigned int nr, cydp_t flags)
{
	pte_t pte;

	for (;;) {
		if (flags == CYDP_CLEAR_YOUNG)
			ptep_test_and_clear_young(vma, addr, ptep);
		else {
			pte = ptep_get_and_clear(vma->vm_mm, addr, ptep);
			if (flags & CYDP_CLEAR_YOUNG)
				pte = pte_mkold(pte);
			if (flags & CYDP_CLEAR_DIRTY)
				pte = pte_mkclean(pte);
			set_pte_at(vma->vm_mm, addr, ptep, pte);
		}
		if (--nr == 0)
			break;
		ptep++;
		addr += PAGE_SIZE;
	}
}
#endif

static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep)
{
Loading