mm/rmap: split migration into its own function (a98a2f0c) · Commits · EulixOS / Software / Kernel

include/linux/rmap.h

+1 −3

Original line number	Diff line number	Diff line
		@@ -86,8 +86,6 @@ struct anon_vma_chain {
		};

		enum ttu_flags {
		TTU_MIGRATION = 0x1, /* migration mode */

		TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */
		TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */
		TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */
		@@ -97,7 +95,6 @@ enum ttu_flags {
		* do a final flush if necessary */
		TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
		* caller holds it */
		TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
		};

		#ifdef CONFIG_MMU
		@@ -194,6 +191,7 @@ static inline void page_dup_rmap(struct page *page, bool compound)
		int page_referenced(struct page *, int is_locked,
		struct mem_cgroup memcg, unsigned long vm_flags);

		void try_to_migrate(struct page *page, enum ttu_flags flags);
		void try_to_unmap(struct page *, enum ttu_flags flags);

		/* Avoid racy checks */

mm/huge_memory.c

+10 −6

Original line number	Diff line number	Diff line
		@@ -2309,16 +2309,20 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,

		static void unmap_page(struct page *page)
		{
		enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK \| TTU_SYNC \|
		TTU_RMAP_LOCKED \| TTU_SPLIT_HUGE_PMD;
		enum ttu_flags ttu_flags = TTU_RMAP_LOCKED \| TTU_SPLIT_HUGE_PMD \|
		TTU_SYNC;

		VM_BUG_ON_PAGE(!PageHead(page), page);

		/* If TTU_SPLIT_FREEZE is ever extended to file, update remap_page() */
		/*
		* Anon pages need migration entries to preserve them, but file
		* pages can simply be left unmapped, then faulted back on demand.
		* If that is ever changed (perhaps for mlock), update remap_page().
		*/
		if (PageAnon(page))
		ttu_flags \|= TTU_SPLIT_FREEZE;

		try_to_unmap(page, ttu_flags);
		try_to_migrate(page, ttu_flags);
		else
		try_to_unmap(page, ttu_flags \| TTU_IGNORE_MLOCK);

		VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
		}

mm/migrate.c

+4 −5

Original line number	Diff line number	Diff line
		@@ -1109,7 +1109,7 @@ static int __unmap_and_move(struct page page, struct page newpage,
		/* Establish migration ptes */
		VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
		page);
		try_to_unmap(page, TTU_MIGRATION\|TTU_IGNORE_MLOCK);
		try_to_migrate(page, 0);
		page_was_mapped = 1;
		}

		@@ -1311,7 +1311,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,

		if (page_mapped(hpage)) {
		bool mapping_locked = false;
		enum ttu_flags ttu = TTU_MIGRATION\|TTU_IGNORE_MLOCK;
		enum ttu_flags ttu = 0;

		if (!PageAnon(hpage)) {
		/*
		@@ -1328,7 +1328,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
		ttu \|= TTU_RMAP_LOCKED;
		}

		try_to_unmap(hpage, ttu);
		try_to_migrate(hpage, ttu);
		page_was_mapped = 1;

		if (mapping_locked)
		@@ -2602,7 +2602,6 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
		*/
		static void migrate_vma_unmap(struct migrate_vma *migrate)
		{
		int flags = TTU_MIGRATION \| TTU_IGNORE_MLOCK;
		const unsigned long npages = migrate->npages;
		const unsigned long start = migrate->start;
		unsigned long addr, i, restore = 0;
		@@ -2614,7 +2613,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
		continue;

		if (page_mapped(page)) {
		try_to_unmap(page, flags);
		try_to_migrate(page, 0);
		if (page_mapped(page))
		goto restore;
		}

mm/rmap.c

+274 −93

Original line number	Diff line number	Diff line
		@@ -1411,14 +1411,8 @@ static bool try_to_unmap_one(struct page page, struct vm_area_struct vma,
		if (flags & TTU_SYNC)
		pvmw.flags = PVMW_SYNC;

		if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
		is_zone_device_page(page) && !is_device_private_page(page))
		return true;

		if (flags & TTU_SPLIT_HUGE_PMD) {
		split_huge_pmd_address(vma, address,
		flags & TTU_SPLIT_FREEZE, page);
		}
		if (flags & TTU_SPLIT_HUGE_PMD)
		split_huge_pmd_address(vma, address, false, page);

		/*
		* For THP, we have to assume the worse case ie pmd for invalidation.
		@@ -1443,16 +1437,6 @@ static bool try_to_unmap_one(struct page page, struct vm_area_struct vma,
		mmu_notifier_invalidate_range_start(&range);

		while (page_vma_mapped_walk(&pvmw)) {
		#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
		/* PMD-mapped THP migration entry */
		if (!pvmw.pte && (flags & TTU_MIGRATION)) {
		VM_BUG_ON_PAGE(PageHuge(page) \|\| !PageTransCompound(page), page);

		set_pmd_migration_entry(&pvmw, page);
		continue;
		}
		#endif

		/*
		* If the page is mlock()d, we cannot swap it out.
		* If it's recently referenced (perhaps page_referenced
		@@ -1514,46 +1498,6 @@ static bool try_to_unmap_one(struct page page, struct vm_area_struct vma,
		}
		}

		if (IS_ENABLED(CONFIG_MIGRATION) &&
		(flags & TTU_MIGRATION) &&
		is_zone_device_page(page)) {
		swp_entry_t entry;
		pte_t swp_pte;

		pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);

		/*
		* Store the pfn of the page in a special migration
		* pte. do_swap_page() will wait until the migration
		* pte is removed and then restart fault handling.
		*/
		entry = make_readable_migration_entry(page_to_pfn(page));
		swp_pte = swp_entry_to_pte(entry);

		/*
		* pteval maps a zone device page and is therefore
		* a swap pte.
		*/
		if (pte_swp_soft_dirty(pteval))
		swp_pte = pte_swp_mksoft_dirty(swp_pte);
		if (pte_swp_uffd_wp(pteval))
		swp_pte = pte_swp_mkuffd_wp(swp_pte);
		set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
		/*
		* No need to invalidate here it will synchronize on
		* against the special swap migration pte.
		*
		* The assignment to subpage above was computed from a
		* swap PTE which results in an invalid pointer.
		* Since only PAGE_SIZE pages can currently be
		* migrated, just set it to page. This will need to be
		* changed when hugepage migrations to device private
		* memory are supported.
		*/
		subpage = page;
		goto discard;
		}

		/* Nuke the page table entry. */
		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
		if (should_defer_flush(mm, flags)) {
		@@ -1606,39 +1550,6 @@ static bool try_to_unmap_one(struct page page, struct vm_area_struct vma,
		/* We have to invalidate as we cleared the pte */
		mmu_notifier_invalidate_range(mm, address,
		address + PAGE_SIZE);
		} else if (IS_ENABLED(CONFIG_MIGRATION) &&
		(flags & (TTU_MIGRATION\|TTU_SPLIT_FREEZE))) {
		swp_entry_t entry;
		pte_t swp_pte;

		if (arch_unmap_one(mm, vma, address, pteval) < 0) {
		set_pte_at(mm, address, pvmw.pte, pteval);
		ret = false;
		page_vma_mapped_walk_done(&pvmw);
		break;
		}

		/*
		* Store the pfn of the page in a special migration
		* pte. do_swap_page() will wait until the migration
		* pte is removed and then restart fault handling.
		*/
		if (pte_write(pteval))
		entry = make_writable_migration_entry(
		page_to_pfn(subpage));
		else
		entry = make_readable_migration_entry(
		page_to_pfn(subpage));
		swp_pte = swp_entry_to_pte(entry);
		if (pte_soft_dirty(pteval))
		swp_pte = pte_swp_mksoft_dirty(swp_pte);
		if (pte_uffd_wp(pteval))
		swp_pte = pte_swp_mkuffd_wp(swp_pte);
		set_pte_at(mm, address, pvmw.pte, swp_pte);
		/*
		* No need to invalidate here it will synchronize on
		* against the special swap migration pte.
		*/
		} else if (PageAnon(page)) {
		swp_entry_t entry = { .val = page_private(subpage) };
		pte_t swp_pte;
		@@ -1766,6 +1677,277 @@ void try_to_unmap(struct page *page, enum ttu_flags flags)
		.anon_lock = page_lock_anon_vma_read,
		};

		if (flags & TTU_RMAP_LOCKED)
		rmap_walk_locked(page, &rwc);
		else
		rmap_walk(page, &rwc);
		}

		/*
		* @arg: enum ttu_flags will be passed to this argument.
		*
		* If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
		* containing migration entries. This and TTU_RMAP_LOCKED are the only supported
		* flags.
		*/
		static bool try_to_migrate_one(struct page page, struct vm_area_struct vma,
		unsigned long address, void *arg)
		{
		struct mm_struct *mm = vma->vm_mm;
		struct page_vma_mapped_walk pvmw = {
		.page = page,
		.vma = vma,
		.address = address,
		};
		pte_t pteval;
		struct page *subpage;
		bool ret = true;
		struct mmu_notifier_range range;
		enum ttu_flags flags = (enum ttu_flags)(long)arg;

		if (is_zone_device_page(page) && !is_device_private_page(page))
		return true;

		/*
		* When racing against e.g. zap_pte_range() on another cpu,
		* in between its ptep_get_and_clear_full() and page_remove_rmap(),
		* try_to_migrate() may return before page_mapped() has become false,
		* if page table locking is skipped: use TTU_SYNC to wait for that.
		*/
		if (flags & TTU_SYNC)
		pvmw.flags = PVMW_SYNC;

		/*
		* unmap_page() in mm/huge_memory.c is the only user of migration with
		* TTU_SPLIT_HUGE_PMD and it wants to freeze.
		*/
		if (flags & TTU_SPLIT_HUGE_PMD)
		split_huge_pmd_address(vma, address, true, page);

		/*
		* For THP, we have to assume the worse case ie pmd for invalidation.
		* For hugetlb, it could be much worse if we need to do pud
		* invalidation in the case of pmd sharing.
		*
		* Note that the page can not be free in this function as call of
		* try_to_unmap() must hold a reference on the page.
		*/
		range.end = PageKsm(page) ?
		address + PAGE_SIZE : vma_address_end(page, vma);
		mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
		address, range.end);
		if (PageHuge(page)) {
		/*
		* If sharing is possible, start and end will be adjusted
		* accordingly.
		*/
		adjust_range_if_pmd_sharing_possible(vma, &range.start,
		&range.end);
		}
		mmu_notifier_invalidate_range_start(&range);

		while (page_vma_mapped_walk(&pvmw)) {
		#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
		/* PMD-mapped THP migration entry */
		if (!pvmw.pte) {
		VM_BUG_ON_PAGE(PageHuge(page) \|\|
		!PageTransCompound(page), page);

		set_pmd_migration_entry(&pvmw, page);
		continue;
		}
		#endif

		/* Unexpected PMD-mapped THP? */
		VM_BUG_ON_PAGE(!pvmw.pte, page);

		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
		address = pvmw.address;

		if (PageHuge(page) && !PageAnon(page)) {
		/*
		* To call huge_pmd_unshare, i_mmap_rwsem must be
		* held in write mode. Caller needs to explicitly
		* do this outside rmap routines.
		*/
		VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
		if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
		/*
		* huge_pmd_unshare unmapped an entire PMD
		* page. There is no way of knowing exactly
		* which PMDs may be cached for this mm, so
		* we must flush them all. start/end were
		* already adjusted above to cover this range.
		*/
		flush_cache_range(vma, range.start, range.end);
		flush_tlb_range(vma, range.start, range.end);
		mmu_notifier_invalidate_range(mm, range.start,
		range.end);

		/*
		* The ref count of the PMD page was dropped
		* which is part of the way map counting
		* is done for shared PMDs. Return 'true'
		* here. When there is no other sharing,
		* huge_pmd_unshare returns false and we will
		* unmap the actual page and drop map count
		* to zero.
		*/
		page_vma_mapped_walk_done(&pvmw);
		break;
		}
		}

		/* Nuke the page table entry. */
		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
		pteval = ptep_clear_flush(vma, address, pvmw.pte);

		/* Move the dirty bit to the page. Now the pte is gone. */
		if (pte_dirty(pteval))
		set_page_dirty(page);

		/* Update high watermark before we lower rss */
		update_hiwater_rss(mm);

		if (is_zone_device_page(page)) {
		swp_entry_t entry;
		pte_t swp_pte;

		/*
		* Store the pfn of the page in a special migration
		* pte. do_swap_page() will wait until the migration
		* pte is removed and then restart fault handling.
		*/
		entry = make_readable_migration_entry(
		page_to_pfn(page));
		swp_pte = swp_entry_to_pte(entry);

		/*
		* pteval maps a zone device page and is therefore
		* a swap pte.
		*/
		if (pte_swp_soft_dirty(pteval))
		swp_pte = pte_swp_mksoft_dirty(swp_pte);
		if (pte_swp_uffd_wp(pteval))
		swp_pte = pte_swp_mkuffd_wp(swp_pte);
		set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
		/*
		* No need to invalidate here it will synchronize on
		* against the special swap migration pte.
		*
		* The assignment to subpage above was computed from a
		* swap PTE which results in an invalid pointer.
		* Since only PAGE_SIZE pages can currently be
		* migrated, just set it to page. This will need to be
		* changed when hugepage migrations to device private
		* memory are supported.
		*/
		subpage = page;
		} else if (PageHWPoison(page)) {
		pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
		if (PageHuge(page)) {
		hugetlb_count_sub(compound_nr(page), mm);
		set_huge_swap_pte_at(mm, address,
		pvmw.pte, pteval,
		vma_mmu_pagesize(vma));
		} else {
		dec_mm_counter(mm, mm_counter(page));
		set_pte_at(mm, address, pvmw.pte, pteval);
		}

		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
		/*
		* The guest indicated that the page content is of no
		* interest anymore. Simply discard the pte, vmscan
		* will take care of the rest.
		* A future reference will then fault in a new zero
		* page. When userfaultfd is active, we must not drop
		* this page though, as its main user (postcopy
		* migration) will not expect userfaults on already
		* copied pages.
		*/
		dec_mm_counter(mm, mm_counter(page));
		/* We have to invalidate as we cleared the pte */
		mmu_notifier_invalidate_range(mm, address,
		address + PAGE_SIZE);
		} else {
		swp_entry_t entry;
		pte_t swp_pte;

		if (arch_unmap_one(mm, vma, address, pteval) < 0) {
		set_pte_at(mm, address, pvmw.pte, pteval);
		ret = false;
		page_vma_mapped_walk_done(&pvmw);
		break;
		}

		/*
		* Store the pfn of the page in a special migration
		* pte. do_swap_page() will wait until the migration
		* pte is removed and then restart fault handling.
		*/
		if (pte_write(pteval))
		entry = make_writable_migration_entry(
		page_to_pfn(subpage));
		else
		entry = make_readable_migration_entry(
		page_to_pfn(subpage));

		swp_pte = swp_entry_to_pte(entry);
		if (pte_soft_dirty(pteval))
		swp_pte = pte_swp_mksoft_dirty(swp_pte);
		if (pte_uffd_wp(pteval))
		swp_pte = pte_swp_mkuffd_wp(swp_pte);
		set_pte_at(mm, address, pvmw.pte, swp_pte);
		/*
		* No need to invalidate here it will synchronize on
		* against the special swap migration pte.
		*/
		}

		/*
		* No need to call mmu_notifier_invalidate_range() it has be
		* done above for all cases requiring it to happen under page
		* table lock before mmu_notifier_invalidate_range_end()
		*
		* See Documentation/vm/mmu_notifier.rst
		*/
		page_remove_rmap(subpage, PageHuge(page));
		put_page(page);
		}

		mmu_notifier_invalidate_range_end(&range);

		return ret;
		}

		/**
		* try_to_migrate - try to replace all page table mappings with swap entries
		* @page: the page to replace page table entries for
		* @flags: action and flags
		*
		* Tries to remove all the page table entries which are mapping this page and
		* replace them with special swap entries. Caller must hold the page lock.
		*
		* If is successful, return true. Otherwise, false.
		*/
		void try_to_migrate(struct page *page, enum ttu_flags flags)
		{
		struct rmap_walk_control rwc = {
		.rmap_one = try_to_migrate_one,
		.arg = (void *)flags,
		.done = page_not_mapped,
		.anon_lock = page_lock_anon_vma_read,
		};

		/*
		* Migration always ignores mlock and only supports TTU_RMAP_LOCKED and
		* TTU_SPLIT_HUGE_PMD and TTU_SYNC flags.
		*/
		if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED \| TTU_SPLIT_HUGE_PMD \|
		TTU_SYNC)))
		return;

		/*
		* During exec, a temporary VMA is setup and later moved.
		* The VMA is moved under the anon_vma lock but not the
		@@ -1774,8 +1956,7 @@ void try_to_unmap(struct page *page, enum ttu_flags flags)
		* locking requirements of exec(), migration skips
		* temporary VMAs until after exec() completes.
		*/
		if ((flags & (TTU_MIGRATION\|TTU_SPLIT_FREEZE))
		&& !PageKsm(page) && PageAnon(page))
		if (!PageKsm(page) && PageAnon(page))
		rwc.invalid_vma = invalid_migration_vma;

		if (flags & TTU_RMAP_LOCKED)