Commit c082e2e5 authored by Lance Yang's avatar Lance Yang Committed by Liu Shixin
Browse files

mm/memory: add any_dirty optional pointer to folio_pte_batch()

mainline inclusion
from mainline-v6.10-rc1
commit 96ebdb032096f67e37b582cd2ea2558c402f878b
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IAIHQO

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=96ebdb032096f67e37b582cd2ea2558c402f878b

--------------------------------

This commit adds the any_dirty pointer as an optional parameter to
folio_pte_batch() function.  By using both the any_young and any_dirty
pointers, madvise_free can make smarter decisions about whether to clear
the PTEs when marking large folios as lazyfree.

Link: https://lkml.kernel.org/r/20240418134435.6092-4-ioworker0@gmail.com


Signed-off-by: default avatarLance Yang <ioworker0@gmail.com>
Suggested-by: default avatarDavid Hildenbrand <david@redhat.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Barry Song <21cnbao@gmail.com>
Cc: Jeff Xie <xiehuan09@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLiu Shixin <liushixin2@huawei.com>
parent 06ecf5ab
Loading
Loading
Loading
Loading
+10 −2
Original line number Diff line number Diff line
@@ -131,6 +131,8 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
 *		  first one is writable.
 * @any_young: Optional pointer to indicate whether any entry except the
 *		  first one is young.
 * @any_dirty: Optional pointer to indicate whether any entry except the
 *		  first one is dirty.
 *
 * Detect a PTE batch: consecutive (present) PTEs that map consecutive
 * pages of the same large folio.
@@ -146,18 +148,20 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
 */
static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
		pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
		bool *any_writable, bool *any_young)
		bool *any_writable, bool *any_young, bool *any_dirty)
{
	unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
	const pte_t *end_ptep = start_ptep + max_nr;
	pte_t expected_pte, *ptep;
	bool writable, young;
	bool writable, young, dirty;
	int nr;

	if (any_writable)
		*any_writable = false;
	if (any_young)
		*any_young = false;
	if (any_dirty)
		*any_dirty = false;

	VM_WARN_ON_FOLIO(!pte_present(pte), folio);
	VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
@@ -173,6 +177,8 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
			writable = !!pte_write(pte);
		if (any_young)
			young = !!pte_young(pte);
		if (any_dirty)
			dirty = !!pte_dirty(pte);
		pte = __pte_batch_clear_ignored(pte, flags);

		if (!pte_same(pte, expected_pte))
@@ -190,6 +196,8 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
			*any_writable |= writable;
		if (any_young)
			*any_young |= young;
		if (any_dirty)
			*any_dirty |= dirty;

		nr = pte_batch_hint(ptep, pte);
		expected_pte = pte_advance_pfn(expected_pte, nr);
+14 −5
Original line number Diff line number Diff line
@@ -365,6 +365,18 @@ static inline bool can_do_file_pageout(struct vm_area_struct *vma)
	       file_permission(vma->vm_file, MAY_WRITE) == 0;
}

static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end,
					  struct folio *folio, pte_t *ptep,
					  pte_t pte, bool *any_young,
					  bool *any_dirty)
{
	const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
	int max_nr = (end - addr) / PAGE_SIZE;

	return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
			       any_young, any_dirty);
}

static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
				unsigned long addr, unsigned long end,
				struct mm_walk *walk)
@@ -500,13 +512,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
		 * next pte in the range.
		 */
		if (folio_test_large(folio)) {
			const fpb_t fpb_flags = FPB_IGNORE_DIRTY |
						FPB_IGNORE_SOFT_DIRTY;
			int max_nr = (end - addr) / PAGE_SIZE;
			bool any_young;

			nr = folio_pte_batch(folio, addr, pte, ptent, max_nr,
					     fpb_flags, NULL, &any_young);
			nr = madvise_folio_pte_batch(addr, end, folio, pte,
						     ptent, &any_young, NULL);
			if (any_young)
				ptent = pte_mkyoung(ptent);

+2 −2
Original line number Diff line number Diff line
@@ -996,7 +996,7 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
			flags |= FPB_IGNORE_SOFT_DIRTY;

		nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags,
				     &any_writable, NULL);
				     &any_writable, NULL, NULL);
		folio_ref_add(folio, nr);
		if (folio_test_anon(folio)) {
			if (unlikely(folio_try_dup_anon_rmap_ptes(folio, page,
@@ -1563,7 +1563,7 @@ static inline int zap_present_ptes(struct mmu_gather *tlb,
	 */
	if (unlikely(folio_test_large(folio) && max_nr != 1)) {
		nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags,
				     NULL, NULL);
				     NULL, NULL, NULL);

		zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr,
				       addr, details, rss, force_flush,