mm/gup: retire follow_hugetlb_page() (48498071) · Commits · EulixOS / Software / Kernel

fs/userfaultfd.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -427,7 +427,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
		*
		* We also don't do userfault handling during
		* coredumping. hugetlbfs has the special
		* follow_hugetlb_page() to skip missing pages in the
		* hugetlb_follow_page_mask() to skip missing pages in the
		* FOLL_DUMP case, anon memory also checks for FOLL_DUMP with
		* the no_page_table() helper in follow_page_mask(), but the
		* shmem_vm_ops->fault method is invoked even during

include/linux/hugetlb.h

+0 −12

Original line number	Diff line number	Diff line
		@@ -133,9 +133,6 @@ int copy_hugetlb_page_range(struct mm_struct , struct mm_struct ,
		struct page hugetlb_follow_page_mask(struct vm_area_struct vma,
		unsigned long address, unsigned int flags,
		unsigned int *page_mask);
		long follow_hugetlb_page(struct mm_struct , struct vm_area_struct ,
		struct page *, unsigned long , unsigned long *,
		long, unsigned int, int *);
		void unmap_hugepage_range(struct vm_area_struct *,
		unsigned long, unsigned long, struct page *,
		zap_flags_t);
		@@ -305,15 +302,6 @@ static inline struct page *hugetlb_follow_page_mask(
		BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
		}

		static inline long follow_hugetlb_page(struct mm_struct *mm,
		struct vm_area_struct vma, struct page *pages,
		unsigned long position, unsigned long nr_pages,
		long i, unsigned int flags, int *nonblocking)
		{
		BUG();
		return 0;
		}

		static inline int copy_hugetlb_page_range(struct mm_struct *dst,
		struct mm_struct *src,
		struct vm_area_struct *dst_vma,

mm/gup.c

+0 −19

Original line number	Diff line number	Diff line
		@@ -819,9 +819,6 @@ static struct page follow_page_mask(struct vm_area_struct vma,
		* Call hugetlb_follow_page_mask for hugetlb vmas as it will use
		* special hugetlb page table walking code. This eliminates the
		* need to check for hugetlb entries in the general walking code.
		*
		* hugetlb_follow_page_mask is only for follow_page() handling here.
		* Ordinary GUP uses follow_hugetlb_page for hugetlb processing.
		*/
		if (is_vm_hugetlb_page(vma))
		return hugetlb_follow_page_mask(vma, address, flags,
		@@ -1221,22 +1218,6 @@ static long __get_user_pages(struct mm_struct *mm,
		ret = check_vma_flags(vma, gup_flags);
		if (ret)
		goto out;

		if (is_vm_hugetlb_page(vma)) {
		i = follow_hugetlb_page(mm, vma, pages,
		&start, &nr_pages, i,
		gup_flags, locked);
		if (!*locked) {
		/*
		* We've got a VM_FAULT_RETRY
		* and we've lost mmap_lock.
		* We must stop here.
		*/
		BUG_ON(gup_flags & FOLL_NOWAIT);
		goto out;
		}
		continue;
		}
		}
		retry:
		/*

mm/hugetlb.c

+0 −224

Original line number	Diff line number	Diff line
		@@ -5721,7 +5721,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct mm, struct vm_area_struct vma,

		/*
		* Return whether there is a pagecache page to back given address within VMA.
		* Caller follow_hugetlb_page() holds page_table_lock so we cannot lock_page.
		*/
		static bool hugetlbfs_pagecache_present(struct hstate *h,
		struct vm_area_struct *vma, unsigned long address)
		@@ -6422,37 +6421,6 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
		}
		#endif /* CONFIG_USERFAULTFD */

		static void record_subpages(struct page page, struct vm_area_struct vma,
		int refs, struct page **pages)
		{
		int nr;

		for (nr = 0; nr < refs; nr++) {
		if (likely(pages))
		pages[nr] = nth_page(page, nr);
		}
		}

		static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma,
		unsigned int flags, pte_t *pte,
		bool *unshare)
		{
		pte_t pteval = huge_ptep_get(pte);

		*unshare = false;
		if (is_swap_pte(pteval))
		return true;
		if (huge_pte_write(pteval))
		return false;
		if (flags & FOLL_WRITE)
		return true;
		if (gup_must_unshare(vma, flags, pte_page(pteval))) {
		*unshare = true;
		return true;
		}
		return false;
		}

		struct page hugetlb_follow_page_mask(struct vm_area_struct vma,
		unsigned long address, unsigned int flags,
		unsigned int *page_mask)
		@@ -6524,198 +6492,6 @@ struct page hugetlb_follow_page_mask(struct vm_area_struct vma,
		return page;
		}

		long follow_hugetlb_page(struct mm_struct mm, struct vm_area_struct vma,
		struct page *pages, unsigned long position,
		unsigned long *nr_pages, long i, unsigned int flags,
		int *locked)
		{
		unsigned long pfn_offset;
		unsigned long vaddr = *position;
		unsigned long remainder = *nr_pages;
		struct hstate *h = hstate_vma(vma);
		int err = -EFAULT, refs;

		while (vaddr < vma->vm_end && remainder) {
		pte_t *pte;
		spinlock_t *ptl = NULL;
		bool unshare = false;
		int absent;
		struct page *page;

		/*
		* If we have a pending SIGKILL, don't keep faulting pages and
		* potentially allocating memory.
		*/
		if (fatal_signal_pending(current)) {
		remainder = 0;
		break;
		}

		hugetlb_vma_lock_read(vma);
		/*
		* Some archs (sparc64, sh*) have multiple pte_ts to
		* each hugepage. We have to make sure we get the
		* first, for the page indexing below to work.
		*
		* Note that page table lock is not held when pte is null.
		*/
		pte = hugetlb_walk(vma, vaddr & huge_page_mask(h),
		huge_page_size(h));
		if (pte)
		ptl = huge_pte_lock(h, mm, pte);
		absent = !pte \|\| huge_pte_none(huge_ptep_get(pte));

		/*
		* When coredumping, it suits get_dump_page if we just return
		* an error where there's an empty slot with no huge pagecache
		* to back it. This way, we avoid allocating a hugepage, and
		* the sparse dumpfile avoids allocating disk blocks, but its
		* huge holes still show up with zeroes where they need to be.
		*/
		if (absent && (flags & FOLL_DUMP) &&
		!hugetlbfs_pagecache_present(h, vma, vaddr)) {
		if (pte)
		spin_unlock(ptl);
		hugetlb_vma_unlock_read(vma);
		remainder = 0;
		break;
		}

		/*
		* We need call hugetlb_fault for both hugepages under migration
		* (in which case hugetlb_fault waits for the migration,) and
		* hwpoisoned hugepages (in which case we need to prevent the
		* caller from accessing to them.) In order to do this, we use
		* here is_swap_pte instead of is_hugetlb_entry_migration and
		* is_hugetlb_entry_hwpoisoned. This is because it simply covers
		* both cases, and because we can't follow correct pages
		* directly from any kind of swap entries.
		*/
		if (absent \|\|
		__follow_hugetlb_must_fault(vma, flags, pte, &unshare)) {
		vm_fault_t ret;
		unsigned int fault_flags = 0;

		if (pte)
		spin_unlock(ptl);
		hugetlb_vma_unlock_read(vma);

		if (flags & FOLL_WRITE)
		fault_flags \|= FAULT_FLAG_WRITE;
		else if (unshare)
		fault_flags \|= FAULT_FLAG_UNSHARE;
		if (locked) {
		fault_flags \|= FAULT_FLAG_ALLOW_RETRY \|
		FAULT_FLAG_KILLABLE;
		if (flags & FOLL_INTERRUPTIBLE)
		fault_flags \|= FAULT_FLAG_INTERRUPTIBLE;
		}
		if (flags & FOLL_NOWAIT)
		fault_flags \|= FAULT_FLAG_ALLOW_RETRY \|
		FAULT_FLAG_RETRY_NOWAIT;
		if (flags & FOLL_TRIED) {
		/*
		* Note: FAULT_FLAG_ALLOW_RETRY and
		* FAULT_FLAG_TRIED can co-exist
		*/
		fault_flags \|= FAULT_FLAG_TRIED;
		}
		ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
		if (ret & VM_FAULT_ERROR) {
		err = vm_fault_to_errno(ret, flags);
		remainder = 0;
		break;
		}
		if (ret & VM_FAULT_RETRY) {
		if (locked &&
		!(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
		*locked = 0;
		*nr_pages = 0;
		/*
		* VM_FAULT_RETRY must not return an
		* error, it will return zero
		* instead.
		*
		* No need to update "position" as the
		* caller will not check it after
		* *nr_pages is set to 0.
		*/
		return i;
		}
		continue;
		}

		pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
		page = pte_page(huge_ptep_get(pte));

		VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
		!PageAnonExclusive(page), page);

		/*
		* If subpage information not requested, update counters
		* and skip the same_page loop below.
		*/
		if (!pages && !pfn_offset &&
		(vaddr + huge_page_size(h) < vma->vm_end) &&
		(remainder >= pages_per_huge_page(h))) {
		vaddr += huge_page_size(h);
		remainder -= pages_per_huge_page(h);
		i += pages_per_huge_page(h);
		spin_unlock(ptl);
		hugetlb_vma_unlock_read(vma);
		continue;
		}

		/* vaddr may not be aligned to PAGE_SIZE */
		refs = min3(pages_per_huge_page(h) - pfn_offset, remainder,
		(vma->vm_end - ALIGN_DOWN(vaddr, PAGE_SIZE)) >> PAGE_SHIFT);

		if (pages)
		record_subpages(nth_page(page, pfn_offset),
		vma, refs,
		likely(pages) ? pages + i : NULL);

		if (pages) {
		/*
		* try_grab_folio() should always succeed here,
		* because: a) we hold the ptl lock, and b) we've just
		* checked that the huge page is present in the page
		* tables. If the huge page is present, then the tail
		* pages must also be present. The ptl prevents the
		* head page and tail pages from being rearranged in
		* any way. As this is hugetlb, the pages will never
		* be p2pdma or not longterm pinable. So this page
		* must be available at this point, unless the page
		* refcount overflowed:
		*/
		if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
		flags))) {
		spin_unlock(ptl);
		hugetlb_vma_unlock_read(vma);
		remainder = 0;
		err = -ENOMEM;
		break;
		}
		}

		vaddr += (refs << PAGE_SHIFT);
		remainder -= refs;
		i += refs;

		spin_unlock(ptl);
		hugetlb_vma_unlock_read(vma);
		}
		*nr_pages = remainder;
		/*
		* setting position is actually required only if remainder is
		* not zero but it's faster not to add a "if (remainder)"
		* branch.
		*/
		*position = vaddr;

		return i ? i : err;
		}

		long hugetlb_change_protection(struct vm_area_struct *vma,
		unsigned long address, unsigned long end,
		pgprot_t newprot, unsigned long cp_flags)