Commit e9c07c32 authored by Lance Yang's avatar Lance Yang Committed by Liu Shixin
Browse files

mm/rmap: integrate PMD-mapped folio splitting into pagewalk loop

mainline inclusion
from mainline-v6.11-rc1
commit 29e847d2ade3cdff36afe095fdbeb9b5f71a197a
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IAIHQO

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=29e847d2ade3cdff36afe095fdbeb9b5f71a197a

--------------------------------

In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
folios, start the pagewalk first, then call split_huge_pmd_address() to
split the folio.

Link: https://lkml.kernel.org/r/20240614015138.31461-3-ioworker0@gmail.com


Signed-off-by: default avatarLance Yang <ioworker0@gmail.com>
Suggested-by: default avatarDavid Hildenbrand <david@redhat.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Suggested-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: default avatarZi Yan <ziy@nvidia.com>
Cc: Bang Li <libang.li@antgroup.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Fangrui Song <maskray@google.com>
Cc: Jeff Xie <xiehuan09@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLiu Shixin <liushixin2@huawei.com>
parent 3dd65353
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -419,6 +419,9 @@ static inline bool thp_migration_supported(void)
	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}

void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
			   pmd_t *pmd, bool freeze, struct folio *folio);

#else /* CONFIG_TRANSPARENT_HUGEPAGE */

static inline bool folio_test_pmd_mappable(struct folio *folio)
@@ -477,6 +480,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long address, bool freeze, struct folio *folio) {}
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
		unsigned long address, bool freeze, struct folio *folio) {}
static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
					 unsigned long address, pmd_t *pmd,
					 bool freeze, struct folio *folio) {}

#define split_huge_pud(__vma, __pmd, __address)	\
	do { } while (0)
+24 −0
Original line number Diff line number Diff line
@@ -697,6 +697,30 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
		spin_unlock(pvmw->ptl);
}

/**
 * page_vma_mapped_walk_restart - Restart the page table walk.
 * @pvmw: Pointer to struct page_vma_mapped_walk.
 *
 * It restarts the page table walk when changes occur in the page
 * table, such as splitting a PMD. Ensures that the PTL held during
 * the previous walk is released and resets the state to allow for
 * a new walk starting at the current address stored in pvmw->address.
 */
static inline void
page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw)
{
	WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte);

	if (likely(pvmw->ptl))
		spin_unlock(pvmw->ptl);
	else
		WARN_ON_ONCE(1);

	pvmw->ptl = NULL;
	pvmw->pmd = NULL;
	pvmw->pte = NULL;
}

bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);

/*
+22 −20
Original line number Diff line number Diff line
@@ -2751,6 +2751,27 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
	pmd_populate(mm, pmd, pgtable);
}

void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
			   pmd_t *pmd, bool freeze, struct folio *folio)
{
	VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
	VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
	VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
	VM_BUG_ON(freeze && !folio);

	/*
	 * When the caller requests to set up a migration entry, we
	 * require a folio to check the PMD against. Otherwise, there
	 * is a risk of replacing the wrong folio.
	 */
	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
	    is_pmd_migration_entry(*pmd)) {
		if (folio && folio != pmd_folio(*pmd))
			return;
		__split_huge_pmd_locked(vma, pmd, address, freeze);
	}
}

void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long address, bool freeze, struct folio *folio)
{
@@ -2762,26 +2783,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
				(address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
	mmu_notifier_invalidate_range_start(&range);
	ptl = pmd_lock(vma->vm_mm, pmd);

	/*
	 * If caller asks to setup a migration entry, we need a folio to check
	 * pmd against. Otherwise we can end up replacing wrong folio.
	 */
	VM_BUG_ON(freeze && !folio);
	VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));

	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
	    is_pmd_migration_entry(*pmd)) {
		/*
		 * It's safe to call pmd_page when folio is set because it's
		 * guaranteed that pmd is present.
		 */
		if (folio && folio != pmd_folio(*pmd))
			goto out;
		__split_huge_pmd_locked(vma, pmd, range.start, freeze);
	}

out:
	split_huge_pmd_locked(vma, range.start, pmd, freeze, folio);
	spin_unlock(ptl);
	mmu_notifier_invalidate_range_end(&range);
}
+15 −6
Original line number Diff line number Diff line
@@ -1617,9 +1617,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
	if (flags & TTU_SYNC)
		pvmw.flags = PVMW_SYNC;

	if (flags & TTU_SPLIT_HUGE_PMD)
		split_huge_pmd_address(vma, address, false, folio);

	/*
	 * For THP, we have to assume the worse case ie pmd for invalidation.
	 * For hugetlb, it could be much worse if we need to do pud
@@ -1645,9 +1642,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
	mmu_notifier_invalidate_range_start(&range);

	while (page_vma_mapped_walk(&pvmw)) {
		/* Unexpected PMD-mapped THP? */
		VM_BUG_ON_FOLIO(!pvmw.pte, folio);

		/*
		 * If the folio is in an mlock()d vma, we must not swap it out.
		 */
@@ -1659,6 +1653,21 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
			goto walk_abort;
		}

		if (!pvmw.pte && (flags & TTU_SPLIT_HUGE_PMD)) {
			/*
			 * We temporarily have to drop the PTL and start once
			 * again from that now-PTE-mapped page table.
			 */
			split_huge_pmd_locked(vma, pvmw.address, pvmw.pmd,
					      false, folio);
			flags &= ~TTU_SPLIT_HUGE_PMD;
			page_vma_mapped_walk_restart(&pvmw);
			continue;
		}

		/* Unexpected PMD-mapped THP? */
		VM_BUG_ON_FOLIO(!pvmw.pte, folio);

		pfn = pte_pfn(ptep_get(pvmw.pte));
		subpage = folio_page(folio, pfn - folio_pfn(folio));
		address = pvmw.address;