Commit 76b10d25 authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle) Committed by Nanyong Sun
Browse files

khugepaged: use a folio throughout collapse_file()

mainline inclusion
from mainline-v6.10-rc1
commit 8d1e24c0b82d9730d05ee85eb7f4195df8cdf6a6
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8d1e24c0b82d9730d05ee85eb7f4195df8cdf6a6

--------------------------------

Pull folios from the page cache instead of pages.  Half of this work had
been done already, but we were still operating on pages for a large chunk
of this function.  There is no attempt in this patch to handle large
folios that are smaller than a THP; that will have to wait for a future
patch.

[willy@infradead.org: the unlikely() is embedded in IS_ERR()]
  Link: https://lkml.kernel.org/r/ZhIWX8K0E2tSyMSr@casper.infradead.org
Link: https://lkml.kernel.org/r/20240403171838.1445826-7-willy@infradead.org


Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Conflicts:
            mm/khugepaged.c
[return val conflict for copy_mc_highpage, which is introduced
by commit 681be068 ("mm/hwpoison: return -EFAULT when copy fail
in copy_mc_[user]_highpage()")]
Signed-off-by: default avatarNanyong Sun <sunnanyong@huawei.com>
parent 03b6efb8
Loading
Loading
Loading
Loading
+54 −59
Original line number Diff line number Diff line
@@ -1818,9 +1818,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
			 struct collapse_control *cc)
{
	struct address_space *mapping = file->f_mapping;
	struct page *page;
	struct page *tmp, *dst;
	struct folio *folio, *new_folio;
	struct page *dst;
	struct folio *folio, *tmp, *new_folio;
	pgoff_t index = 0, end = start + HPAGE_PMD_NR;
	LIST_HEAD(pagelist);
	XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
@@ -1858,11 +1857,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,

	for (index = start; index < end; index++) {
		xas_set(&xas, index);
		page = xas_load(&xas);
		folio = xas_load(&xas);

		VM_BUG_ON(index != xas.xa_index);
		if (is_shmem) {
			if (!page) {
			if (!folio) {
				/*
				 * Stop if extent has been truncated or
				 * hole-punched, and is now completely
@@ -1878,7 +1877,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
				continue;
			}

			if (xa_is_value(page) || !PageUptodate(page)) {
			if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
				xas_unlock_irq(&xas);
				/* swap in or instantiate fallocated page */
				if (shmem_get_folio(mapping->host, index, 0,
@@ -1888,28 +1887,27 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
				}
				/* drain lru cache to help isolate_lru_page() */
				lru_add_drain();
				page = folio_file_page(folio, index);
			} else if (trylock_page(page)) {
				get_page(page);
			} else if (folio_trylock(folio)) {
				folio_get(folio);
				xas_unlock_irq(&xas);
			} else {
				result = SCAN_PAGE_LOCK;
				goto xa_locked;
			}
		} else {	/* !is_shmem */
			if (!page || xa_is_value(page)) {
			if (!folio || xa_is_value(folio)) {
				xas_unlock_irq(&xas);
				page_cache_sync_readahead(mapping, &file->f_ra,
							  file, index,
							  end - index);
				/* drain lru cache to help isolate_lru_page() */
				lru_add_drain();
				page = find_lock_page(mapping, index);
				if (unlikely(page == NULL)) {
				folio = filemap_lock_folio(mapping, index);
				if (IS_ERR(folio)) {
					result = SCAN_FAIL;
					goto xa_unlocked;
				}
			} else if (PageDirty(page)) {
			} else if (folio_test_dirty(folio)) {
				/*
				 * khugepaged only works on read-only fd,
				 * so this page is dirty because it hasn't
@@ -1927,12 +1925,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
				filemap_flush(mapping);
				result = SCAN_FAIL;
				goto xa_unlocked;
			} else if (PageWriteback(page)) {
			} else if (folio_test_writeback(folio)) {
				xas_unlock_irq(&xas);
				result = SCAN_FAIL;
				goto xa_unlocked;
			} else if (trylock_page(page)) {
				get_page(page);
			} else if (folio_trylock(folio)) {
				folio_get(folio);
				xas_unlock_irq(&xas);
			} else {
				result = SCAN_PAGE_LOCK;
@@ -1941,35 +1939,31 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
		}

		/*
		 * The page must be locked, so we can drop the i_pages lock
		 * The folio must be locked, so we can drop the i_pages lock
		 * without racing with truncate.
		 */
		VM_BUG_ON_PAGE(!PageLocked(page), page);
		VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);

		/* make sure the page is up to date */
		if (unlikely(!PageUptodate(page))) {
		/* make sure the folio is up to date */
		if (unlikely(!folio_test_uptodate(folio))) {
			result = SCAN_FAIL;
			goto out_unlock;
		}

		/*
		 * If file was truncated then extended, or hole-punched, before
		 * we locked the first page, then a THP might be there already.
		 * we locked the first folio, then a THP might be there already.
		 * This will be discovered on the first iteration.
		 */
		if (PageTransCompound(page)) {
			struct page *head = compound_head(page);

			result = compound_order(head) == HPAGE_PMD_ORDER &&
					head->index == start
		if (folio_test_large(folio)) {
			result = folio_order(folio) == HPAGE_PMD_ORDER &&
					folio->index == start
					/* Maybe PMD-mapped */
					? SCAN_PTE_MAPPED_HUGEPAGE
					: SCAN_PAGE_COMPOUND;
			goto out_unlock;
		}

		folio = page_folio(page);

		if (folio_mapping(folio) != mapping) {
			result = SCAN_TRUNCATED;
			goto out_unlock;
@@ -1979,7 +1973,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
				  folio_test_writeback(folio))) {
			/*
			 * khugepaged only works on read-only fd, so this
			 * page is dirty because it hasn't been flushed
			 * folio is dirty because it hasn't been flushed
			 * since first write.
			 */
			result = SCAN_FAIL;
@@ -2003,33 +1997,34 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,

		xas_lock_irq(&xas);

		VM_BUG_ON_PAGE(page != xa_load(xas.xa, index), page);
		VM_BUG_ON_FOLIO(folio != xa_load(xas.xa, index), folio);

		/*
		 * We control three references to the page:
		 * We control three references to the folio:
		 *  - we hold a pin on it;
		 *  - one reference from page cache;
		 *  - one from isolate_lru_page;
		 * If those are the only references, then any new usage of the
		 * page will have to fetch it from the page cache. That requires
		 * locking the page to handle truncate, so any new usage will be
		 * blocked until we unlock page after collapse/during rollback.
		 */
		if (page_count(page) != 3) {
		 *  - one from lru_isolate_folio;
		 * If those are the only references, then any new usage
		 * of the folio will have to fetch it from the page
		 * cache. That requires locking the folio to handle
		 * truncate, so any new usage will be blocked until we
		 * unlock folio after collapse/during rollback.
		 */
		if (folio_ref_count(folio) != 3) {
			result = SCAN_PAGE_COUNT;
			xas_unlock_irq(&xas);
			putback_lru_page(page);
			folio_putback_lru(folio);
			goto out_unlock;
		}

		/*
		 * Accumulate the pages that are being collapsed.
		 * Accumulate the folios that are being collapsed.
		 */
		list_add_tail(&page->lru, &pagelist);
		list_add_tail(&folio->lru, &pagelist);
		continue;
out_unlock:
		unlock_page(page);
		put_page(page);
		folio_unlock(folio);
		folio_put(folio);
		goto xa_unlocked;
	}

@@ -2068,17 +2063,17 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
	}

	/*
	 * The old pages are locked, so they won't change anymore.
	 * The old folios are locked, so they won't change anymore.
	 */
	index = start;
	dst = folio_page(new_folio, 0);
	list_for_each_entry(page, &pagelist, lru) {
		while (index < page->index) {
	list_for_each_entry(folio, &pagelist, lru) {
		while (index < folio->index) {
			clear_highpage(dst);
			index++;
			dst++;
		}
		if (copy_mc_highpage(dst, page)) {
		if (copy_mc_highpage(dst, folio_page(folio, 0))) {
			result = SCAN_COPY_MC;
			goto rollback;
		}
@@ -2190,15 +2185,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
	folio_unlock(new_folio);

	/*
	 * The collapse has succeeded, so free the old pages.
	 * The collapse has succeeded, so free the old folios.
	 */
	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
		list_del(&page->lru);
		page->mapping = NULL;
		ClearPageActive(page);
		ClearPageUnevictable(page);
		unlock_page(page);
		folio_put_refs(page_folio(page), 3);
	list_for_each_entry_safe(folio, tmp, &pagelist, lru) {
		list_del(&folio->lru);
		folio->mapping = NULL;
		folio_clear_active(folio);
		folio_clear_unevictable(folio);
		folio_unlock(folio);
		folio_put_refs(folio, 3);
	}

	goto out;
@@ -2212,11 +2207,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
		shmem_uncharge(mapping->host, nr_none);
	}

	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
		list_del(&page->lru);
		unlock_page(page);
		putback_lru_page(page);
		put_page(page);
	list_for_each_entry_safe(folio, tmp, &pagelist, lru) {
		list_del(&folio->lru);
		folio_unlock(folio);
		folio_putback_lru(folio);
		folio_put(folio);
	}
	/*
	 * Undo the updates of filemap_nr_thps_inc for non-SHMEM