Commit 378397cc authored by Mike Kravetz's avatar Mike Kravetz Committed by Andrew Morton
Browse files

hugetlb: create hugetlb_unmap_file_folio to unmap single file folio

Create the new routine hugetlb_unmap_file_folio that will unmap a single
file folio.  This is refactored code from hugetlb_vmdelete_list.  It is
modified to do locking within the routine itself and check whether the
page is mapped within a specific vma before unmapping.

This refactoring will be put to use and expanded upon in a subsequent
patch adding vma specific locking.

Link: https://lkml.kernel.org/r/20220914221810.95771-8-mike.kravetz@oracle.com


Signed-off-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: default avatarMiaohe Lin <linmiaohe@huawei.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: James Houghton <jthoughton@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Prakash Sangappa <prakash.sangappa@oracle.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 8d9bfb26
Loading
Loading
Loading
Loading
+94 −29
Original line number Diff line number Diff line
@@ -371,6 +371,94 @@ static void hugetlb_delete_from_page_cache(struct page *page)
	delete_from_page_cache(page);
}

/*
 * Called with i_mmap_rwsem held for inode based vma maps.  This makes
 * sure vma (and vm_mm) will not go away.  We also hold the hugetlb fault
 * mutex for the page in the mapping.  So, we can not race with page being
 * faulted into the vma.
 */
static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
				unsigned long addr, struct page *page)
{
	pte_t *ptep, pte;

	ptep = huge_pte_offset(vma->vm_mm, addr,
			huge_page_size(hstate_vma(vma)));

	if (!ptep)
		return false;

	pte = huge_ptep_get(ptep);
	if (huge_pte_none(pte) || !pte_present(pte))
		return false;

	if (pte_page(pte) == page)
		return true;

	return false;
}

/*
 * Can vma_offset_start/vma_offset_end overflow on 32-bit arches?
 * No, because the interval tree returns us only those vmas
 * which overlap the truncated area starting at pgoff,
 * and no vma on a 32-bit arch can span beyond the 4GB.
 */
static unsigned long vma_offset_start(struct vm_area_struct *vma, pgoff_t start)
{
	if (vma->vm_pgoff < start)
		return (start - vma->vm_pgoff) << PAGE_SHIFT;
	else
		return 0;
}

static unsigned long vma_offset_end(struct vm_area_struct *vma, pgoff_t end)
{
	unsigned long t_end;

	if (!end)
		return vma->vm_end;

	t_end = ((end - vma->vm_pgoff) << PAGE_SHIFT) + vma->vm_start;
	if (t_end > vma->vm_end)
		t_end = vma->vm_end;
	return t_end;
}

/*
 * Called with hugetlb fault mutex held.  Therefore, no more mappings to
 * this folio can be created while executing the routine.
 */
static void hugetlb_unmap_file_folio(struct hstate *h,
					struct address_space *mapping,
					struct folio *folio, pgoff_t index)
{
	struct rb_root_cached *root = &mapping->i_mmap;
	struct page *page = &folio->page;
	struct vm_area_struct *vma;
	unsigned long v_start;
	unsigned long v_end;
	pgoff_t start, end;

	start = index * pages_per_huge_page(h);
	end = (index + 1) * pages_per_huge_page(h);

	i_mmap_lock_write(mapping);

	vma_interval_tree_foreach(vma, root, start, end - 1) {
		v_start = vma_offset_start(vma, start);
		v_end = vma_offset_end(vma, end);

		if (!hugetlb_vma_maps_page(vma, vma->vm_start + v_start, page))
			continue;

		unmap_hugepage_range(vma, vma->vm_start + v_start, v_end,
				NULL, ZAP_FLAG_DROP_MARKER);
	}

	i_mmap_unlock_write(mapping);
}

static void
hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
		      zap_flags_t zap_flags)
@@ -383,30 +471,13 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
	 * an inclusive "last".
	 */
	vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) {
		unsigned long v_offset;
		unsigned long v_start;
		unsigned long v_end;

		/*
		 * Can the expression below overflow on 32-bit arches?
		 * No, because the interval tree returns us only those vmas
		 * which overlap the truncated area starting at pgoff,
		 * and no vma on a 32-bit arch can span beyond the 4GB.
		 */
		if (vma->vm_pgoff < start)
			v_offset = (start - vma->vm_pgoff) << PAGE_SHIFT;
		else
			v_offset = 0;
		v_start = vma_offset_start(vma, start);
		v_end = vma_offset_end(vma, end);

		if (!end)
			v_end = vma->vm_end;
		else {
			v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT)
							+ vma->vm_start;
			if (v_end > vma->vm_end)
				v_end = vma->vm_end;
		}

		unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
		unmap_hugepage_range(vma, vma->vm_start + v_start, v_end,
				     NULL, zap_flags);
	}
}
@@ -428,14 +499,8 @@ static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
	 * the fault mutex.  The mutex will prevent faults
	 * until we finish removing the folio.
	 */
	if (unlikely(folio_mapped(folio))) {
		i_mmap_lock_write(mapping);
		hugetlb_vmdelete_list(&mapping->i_mmap,
			index * pages_per_huge_page(h),
			(index + 1) * pages_per_huge_page(h),
			ZAP_FLAG_DROP_MARKER);
		i_mmap_unlock_write(mapping);
	}
	if (unlikely(folio_mapped(folio)))
		hugetlb_unmap_file_folio(h, mapping, folio, index);

	folio_lock(folio);
	/*