Unverified Commit 27f522d6 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!7196 v3 Backport folio feature and bugfix

Merge Pull Request from: @ci-robot 
 
PR sync from: Liu Shixin <liushixin2@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/DH3GQZ4QIAUKR3SUFNHS5E76M2BAWTHJ/ 
Folio feature:

1. Patch series "Split a folio to any lower order folios", v5.
2. arm64: mm: swap: support THP_SWAP on hardware with MTE
3. Patch series "support multi-size THP numa balancing", v2.
4. Patch series "Swap-out mTHP without splitting", v7.

Other improve and bugfix:
1. mm: fix draining remote pageset
2. Patch series "Address some contpte nits".
3. Patch series "mm: page_alloc: fixes for high atomic reserve caluculations", v3.
4. Patch series "Fix I/O high when memory almost met memcg limit", v2.
5. mm, oom:dump_tasks add rss detailed information printing
6. mm: ratelimit stat flush from workingset shrinker
7. mm: madvise: pageout: ignore references rather than clearing young
8. mm: use memalloc_nofs_save() in page_cache_ra_order()
9. mm/vmalloc: fix return value of vb_alloc if size is 0
10. mm: remove struct page from get_shadow_from_swap_cache

Baolin Wang (2):
  mm: factor out the numa mapping rebuilding into a new helper
  mm: support multi-size THP numa balancing

Barry Song (5):
  mm: madvise: pageout: ignore references rather than clearing young
  arm64: mm: swap: support THP_SWAP on hardware with MTE
  madvise:madvise_cold_or_pageout_pte_range(): allow split while
    folio_estimated_sharers = 0
  mm: hold PTL from the first PTE while reclaiming a large folio
  mm: alloc_anon_folio: avoid doing vma_thp_gfp_mask in fallback cases

Charan Teja Kalla (2):
  mm: page_alloc: correct high atomic reserve calculations
  mm: page_alloc: enforce minimum zone size to do high atomic reserves

David Hildenbrand (1):
  mm: convert folio_estimated_sharers() to folio_likely_mapped_shared()

Hailong.Liu (1):
  mm/vmalloc: fix return value of vb_alloc if size is 0

Huang Ying (1):
  mm: fix draining remote pageset

Jiexun Wang (1):
  mm/madvise: add cond_resched() in madvise_cold_or_pageout_pte_range()

John Hubbard (1):
  huge_memory.c: document huge page splitting rules more thoroughly

Kefeng Wang (2):
  mm: use memalloc_nofs_save() in page_cache_ra_order()
  mm: huge_memory: use more folio api in __split_huge_page_tail()

Liu Shixin (2):
  mm/readahead: break read-ahead loop if filemap_add_folio return
    -ENOMEM
  mm/filemap: don't decrease mmap_miss when folio has workingset flag

Matthew Wilcox (Oracle) (4):
  mm: support order-1 folios in the page cache
  XArray: set the marks correctly when splitting an entry
  mm: remove struct page from get_shadow_from_swap_cache
  mm: remove PageAnonExclusive assertions in unuse_pte()

Muhammad Usama Anjum (2):
  selftests/mm: split_huge_page_test: conform test to TAP format output
  selftests: mm: fix unused and uninitialized variable warning

Ryan Roberts (9):
  arm64/mm: export contpte symbols only to GPL users
  arm64/mm: improve comment in contpte_ptep_get_lockless()
  mm: swap: remove CLUSTER_FLAG_HUGE from swap_cluster_info:flags
  mm: swap: free_swap_and_cache_nr() as batched free_swap_and_cache()
  mm: swap: simplify struct percpu_cluster
  mm: swap: update get_swap_pages() to take folio order
  mm: swap: allow storage of all mTHP orders
  mm: vmscan: avoid split during shrink_folio_list()
  mm: madvise: avoid split during MADV_PAGEOUT and MADV_COLD

Sergey Senozhatsky (1):
    madvise_cold_or_pageout_pte_range()

Shakeel Butt (1):
  mm: ratelimit stat flush from workingset shrinker

Yong Wang (1):
  mm, oom:dump_tasks add rss detailed information printing

Zi Yan (9):
  mm/huge_memory: only split PMD mapping when necessary in unmap_folio()
  mm/memcg: use order instead of nr in split_page_memcg()
  mm/page_owner: use order instead of nr in split_page_owner()
  mm: memcg: make memcg huge page split support any order split
  mm: page_owner: add support for splitting to any order in split
    page_owner
  mm: thp: split huge page to any lower order pages
  mm: huge_memory: enable debugfs to split huge pages to any order
  mm/huge_memory: check new folio order when split a folio
  mm/migrate: split source folio if it is on deferred split list


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I9OCYO 
 
Link:https://gitee.com/openeuler/kernel/pulls/7196

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents 782a2fa8 f9bb3fcd
Loading
Loading
Loading
Loading
+2 −17
Original line number Diff line number Diff line
@@ -45,12 +45,6 @@
	__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

static inline bool arch_thp_swp_supported(void)
{
	return !system_supports_mte();
}
#define arch_thp_swp_supported arch_thp_swp_supported

/*
 * Outside of a few very special situations (e.g. hibernation), we always
 * use broadcast TLB invalidation instructions, therefore a spurious page
@@ -1095,12 +1089,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#ifdef CONFIG_ARM64_MTE

#define __HAVE_ARCH_PREPARE_TO_SWAP
static inline int arch_prepare_to_swap(struct page *page)
{
	if (system_supports_mte())
		return mte_save_tags(page);
	return 0;
}
extern int arch_prepare_to_swap(struct folio *folio);

#define __HAVE_ARCH_SWAP_INVALIDATE
static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
@@ -1116,11 +1105,7 @@ static inline void arch_swap_invalidate_area(int type)
}

#define __HAVE_ARCH_SWAP_RESTORE
static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
{
	if (system_supports_mte())
		mte_restore_tags(entry, &folio->page);
}
extern void arch_swap_restore(swp_entry_t entry, struct folio *folio);

#endif /* CONFIG_ARM64_MTE */

+25 −21
Original line number Diff line number Diff line
@@ -135,7 +135,7 @@ void __contpte_try_fold(struct mm_struct *mm, unsigned long addr,
	pte = pte_mkcont(pte);
	contpte_convert(mm, addr, orig_ptep, pte);
}
EXPORT_SYMBOL(__contpte_try_fold);
EXPORT_SYMBOL_GPL(__contpte_try_fold);

void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr,
			pte_t *ptep, pte_t pte)
@@ -150,7 +150,7 @@ void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr,
	pte = pte_mknoncont(pte);
	contpte_convert(mm, addr, ptep, pte);
}
EXPORT_SYMBOL(__contpte_try_unfold);
EXPORT_SYMBOL_GPL(__contpte_try_unfold);

pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte)
{
@@ -178,21 +178,25 @@ pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte)

	return orig_pte;
}
EXPORT_SYMBOL(contpte_ptep_get);
EXPORT_SYMBOL_GPL(contpte_ptep_get);

pte_t contpte_ptep_get_lockless(pte_t *orig_ptep)
{
	/*
	 * Gather access/dirty bits, which may be populated in any of the ptes
	 * of the contig range. We may not be holding the PTL, so any contiguous
	 * range may be unfolded/modified/refolded under our feet. Therefore we
	 * ensure we read a _consistent_ contpte range by checking that all ptes
	 * in the range are valid and have CONT_PTE set, that all pfns are
	 * contiguous and that all pgprots are the same (ignoring access/dirty).
	 * If we find a pte that is not consistent, then we must be racing with
	 * an update so start again. If the target pte does not have CONT_PTE
	 * set then that is considered consistent on its own because it is not
	 * part of a contpte range.
	 * The ptep_get_lockless() API requires us to read and return *orig_ptep
	 * so that it is self-consistent, without the PTL held, so we may be
	 * racing with other threads modifying the pte. Usually a READ_ONCE()
	 * would suffice, but for the contpte case, we also need to gather the
	 * access and dirty bits from across all ptes in the contiguous block,
	 * and we can't read all of those neighbouring ptes atomically, so any
	 * contiguous range may be unfolded/modified/refolded under our feet.
	 * Therefore we ensure we read a _consistent_ contpte range by checking
	 * that all ptes in the range are valid and have CONT_PTE set, that all
	 * pfns are contiguous and that all pgprots are the same (ignoring
	 * access/dirty). If we find a pte that is not consistent, then we must
	 * be racing with an update so start again. If the target pte does not
	 * have CONT_PTE set then that is considered consistent on its own
	 * because it is not part of a contpte range.
	 */

	pgprot_t orig_prot;
@@ -231,7 +235,7 @@ pte_t contpte_ptep_get_lockless(pte_t *orig_ptep)

	return orig_pte;
}
EXPORT_SYMBOL(contpte_ptep_get_lockless);
EXPORT_SYMBOL_GPL(contpte_ptep_get_lockless);

void contpte_set_ptes(struct mm_struct *mm, unsigned long addr,
					pte_t *ptep, pte_t pte, unsigned int nr)
@@ -274,7 +278,7 @@ void contpte_set_ptes(struct mm_struct *mm, unsigned long addr,

	} while (addr != end);
}
EXPORT_SYMBOL(contpte_set_ptes);
EXPORT_SYMBOL_GPL(contpte_set_ptes);

void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
				pte_t *ptep, unsigned int nr, int full)
@@ -282,7 +286,7 @@ void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
	contpte_try_unfold_partial(mm, addr, ptep, nr);
	__clear_full_ptes(mm, addr, ptep, nr, full);
}
EXPORT_SYMBOL(contpte_clear_full_ptes);
EXPORT_SYMBOL_GPL(contpte_clear_full_ptes);

pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
				unsigned long addr, pte_t *ptep,
@@ -291,7 +295,7 @@ pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
	contpte_try_unfold_partial(mm, addr, ptep, nr);
	return __get_and_clear_full_ptes(mm, addr, ptep, nr, full);
}
EXPORT_SYMBOL(contpte_get_and_clear_full_ptes);
EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes);

int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
					unsigned long addr, pte_t *ptep)
@@ -316,7 +320,7 @@ int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,

	return young;
}
EXPORT_SYMBOL(contpte_ptep_test_and_clear_young);
EXPORT_SYMBOL_GPL(contpte_ptep_test_and_clear_young);

int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
					unsigned long addr, pte_t *ptep)
@@ -337,7 +341,7 @@ int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,

	return young;
}
EXPORT_SYMBOL(contpte_ptep_clear_flush_young);
EXPORT_SYMBOL_GPL(contpte_ptep_clear_flush_young);

void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
					pte_t *ptep, unsigned int nr)
@@ -355,7 +359,7 @@ void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
	contpte_try_unfold_partial(mm, addr, ptep, nr);
	__wrprotect_ptes(mm, addr, ptep, nr);
}
EXPORT_SYMBOL(contpte_wrprotect_ptes);
EXPORT_SYMBOL_GPL(contpte_wrprotect_ptes);

int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
					unsigned long addr, pte_t *ptep,
@@ -401,4 +405,4 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,

	return 1;
}
EXPORT_SYMBOL(contpte_ptep_set_access_flags);
EXPORT_SYMBOL_GPL(contpte_ptep_set_access_flags);
+45 −0
Original line number Diff line number Diff line
@@ -68,6 +68,13 @@ void mte_invalidate_tags(int type, pgoff_t offset)
	mte_free_tag_storage(tags);
}

static inline void __mte_invalidate_tags(struct page *page)
{
	swp_entry_t entry = page_swap_entry(page);

	mte_invalidate_tags(swp_type(entry), swp_offset(entry));
}

void mte_invalidate_tags_area(int type)
{
	swp_entry_t entry = swp_entry(type, 0);
@@ -83,3 +90,41 @@ void mte_invalidate_tags_area(int type)
	}
	xa_unlock(&mte_pages);
}

int arch_prepare_to_swap(struct folio *folio)
{
	long i, nr;
	int err;

	if (!system_supports_mte())
		return 0;

	nr = folio_nr_pages(folio);

	for (i = 0; i < nr; i++) {
		err = mte_save_tags(folio_page(folio, i));
		if (err)
			goto out;
	}
	return 0;

out:
	while (i--)
		__mte_invalidate_tags(folio_page(folio, i));
	return err;
}

void arch_swap_restore(swp_entry_t entry, struct folio *folio)
{
	long i, nr;

	if (!system_supports_mte())
		return;

	nr = folio_nr_pages(folio);

	for (i = 0; i < nr; i++) {
		mte_restore_tags(entry, folio_page(folio, i));
		entry.val++;
	}
}
+1 −1
Original line number Diff line number Diff line
@@ -72,7 +72,7 @@ static ssize_t swap_pages_write(struct file *file, const char __user *buf,
	}

	if (!list_empty(&pagelist))
		reclaim_pages(&pagelist);
		reclaim_pages(&pagelist, false);

	ret = count;
	kfree(data_ptr_res);
+12 −19
Original line number Diff line number Diff line
@@ -266,10 +266,11 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,

void folio_prep_large_rmappable(struct folio *folio);
bool can_split_folio(struct folio *folio, int *pextra_pins);
int split_huge_page_to_list(struct page *page, struct list_head *list);
int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
		unsigned int new_order);
static inline int split_huge_page(struct page *page)
{
	return split_huge_page_to_list(page, NULL);
	return split_huge_page_to_list_to_order(page, NULL, 0);
}
void deferred_split_folio(struct folio *folio);

@@ -423,7 +424,8 @@ can_split_folio(struct folio *folio, int *pextra_pins)
	return false;
}
static inline int
split_huge_page_to_list(struct page *page, struct list_head *list)
split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
		unsigned int new_order)
{
	return 0;
}
@@ -520,27 +522,18 @@ static inline bool thp_migration_supported(void)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

static inline int split_folio_to_list(struct folio *folio,
		struct list_head *list)
static inline int split_folio_to_list_to_order(struct folio *folio,
		struct list_head *list, int new_order)
{
	return split_huge_page_to_list(&folio->page, list);
	return split_huge_page_to_list_to_order(&folio->page, list, new_order);
}

static inline int split_folio(struct folio *folio)
static inline int split_folio_to_order(struct folio *folio, int new_order)
{
	return split_folio_to_list(folio, NULL);
	return split_folio_to_list_to_order(folio, NULL, new_order);
}

/*
 * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
 * limitations in the implementation like arm64 MTE can override this to
 * false
 */
#ifndef arch_thp_swp_supported
static inline bool arch_thp_swp_supported(void)
{
	return true;
}
#endif
#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
#define split_folio(f) split_folio_to_order(f, 0)

#endif /* _LINUX_HUGE_MM_H */
Loading