Commit 0e499ed3 authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle)
Browse files

filemap: Return only folios from find_get_entries()



The callers have all been converted to work on folios, so convert
find_get_entries() to return a batch of folios instead of pages.
We also now return multiple large folios in a single call.

Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarWilliam Kucharski <william.kucharski@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 25d6a23e
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -592,8 +592,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
	return head + (index & (thp_nr_pages(head) - 1));
}

unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
		pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
			pgoff_t end, unsigned int nr_pages,
			struct page **pages);
+11 −32
Original line number Diff line number Diff line
@@ -2015,57 +2015,36 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
 * @mapping:	The address_space to search
 * @start:	The starting page cache index
 * @end:	The final page index (inclusive).
 * @pvec:	Where the resulting entries are placed.
 * @fbatch:	Where the resulting entries are placed.
 * @indices:	The cache indices corresponding to the entries in @entries
 *
 * find_get_entries() will search for and return a batch of entries in
 * the mapping.  The entries are placed in @pvec.  find_get_entries()
 * takes a reference on any actual pages it returns.
 * the mapping.  The entries are placed in @fbatch.  find_get_entries()
 * takes a reference on any actual folios it returns.
 *
 * The search returns a group of mapping-contiguous page cache entries
 * with ascending indexes.  There may be holes in the indices due to
 * not-present pages.
 * The entries have ascending indexes.  The indices may not be consecutive
 * due to not-present entries or large folios.
 *
 * Any shadow entries of evicted pages, or swap entries from
 * Any shadow entries of evicted folios, or swap entries from
 * shmem/tmpfs, are included in the returned array.
 *
 * If it finds a Transparent Huge Page, head or tail, find_get_entries()
 * stops at that page: the caller is likely to have a better way to handle
 * the compound page as a whole, and then skip its extent, than repeatedly
 * calling find_get_entries() to return all its tails.
 *
 * Return: the number of pages and shadow entries which were found.
 * Return: The number of entries which were found.
 */
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
		pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
		pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
	XA_STATE(xas, &mapping->i_pages, start);
	struct folio *folio;
	unsigned int ret = 0;
	unsigned nr_entries = PAGEVEC_SIZE;

	rcu_read_lock();
	while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
		struct page *page = &folio->page;
		/*
		 * Terminate early on finding a THP, to allow the caller to
		 * handle it all at once; but continue if this is hugetlbfs.
		 */
		if (!xa_is_value(folio) && folio_test_large(folio) &&
				!folio_test_hugetlb(folio)) {
			page = folio_file_page(folio, xas.xa_index);
			nr_entries = ret + 1;
		}

		indices[ret] = xas.xa_index;
		pvec->pages[ret] = page;
		if (++ret == nr_entries)
		indices[fbatch->nr] = xas.xa_index;
		if (!folio_batch_add(fbatch, folio))
			break;
	}
	rcu_read_unlock();

	pvec->nr = ret;
	return ret;
	return folio_batch_count(fbatch);
}

/**
+4 −0
Original line number Diff line number Diff line
@@ -12,6 +12,8 @@
#include <linux/pagemap.h>
#include <linux/tracepoint-defs.h>

struct folio_batch;

/*
 * The set of flags that only affect watermark checking and reclaim
 * behaviour. This is used by the MM to obey the caller constraints
@@ -92,6 +94,8 @@ static inline void force_page_cache_readahead(struct address_space *mapping,

unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
		pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
		pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
void filemap_free_folio(struct address_space *mapping, struct folio *folio);
int truncate_inode_folio(struct address_space *mapping, struct folio *folio);

+20 −16
Original line number Diff line number Diff line
@@ -920,6 +920,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
	unsigned int partial_start = lstart & (PAGE_SIZE - 1);
	unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
	struct pagevec pvec;
	struct folio_batch fbatch;
	pgoff_t indices[PAGEVEC_SIZE];
	long nr_swaps_freed = 0;
	pgoff_t index;
@@ -987,11 +988,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
	if (start >= end)
		return;

	folio_batch_init(&fbatch);
	index = start;
	while (index < end) {
		cond_resched();

		if (!find_get_entries(mapping, index, end - 1, &pvec,
		if (!find_get_entries(mapping, index, end - 1, &fbatch,
				indices)) {
			/* If all gone or hole-punch or unfalloc, we're done */
			if (index == start || end != -1)
@@ -1000,14 +1002,14 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
			index = start;
			continue;
		}
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];
		for (i = 0; i < folio_batch_count(&fbatch); i++) {
			struct folio *folio = fbatch.folios[i];

			index = indices[i];
			if (xa_is_value(page)) {
			if (xa_is_value(folio)) {
				if (unfalloc)
					continue;
				if (shmem_free_swap(mapping, index, page)) {
				if (shmem_free_swap(mapping, index, folio)) {
					/* Swap was replaced by page: retry */
					index--;
					break;
@@ -1016,33 +1018,35 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
				continue;
			}

			lock_page(page);
			folio_lock(folio);

			if (!unfalloc || !PageUptodate(page)) {
				if (page_mapping(page) != mapping) {
			if (!unfalloc || !folio_test_uptodate(folio)) {
				struct page *page = folio_file_page(folio,
									index);
				if (folio_mapping(folio) != mapping) {
					/* Page was replaced by swap: retry */
					unlock_page(page);
					folio_unlock(folio);
					index--;
					break;
				}
				VM_BUG_ON_PAGE(PageWriteback(page), page);
				VM_BUG_ON_FOLIO(folio_test_writeback(folio),
						folio);
				if (shmem_punch_compound(page, start, end))
					truncate_inode_folio(mapping,
							     page_folio(page));
					truncate_inode_folio(mapping, folio);
				else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
					/* Wipe the page and don't get stuck */
					clear_highpage(page);
					flush_dcache_page(page);
					set_page_dirty(page);
					folio_mark_dirty(folio);
					if (index <
					    round_up(start, HPAGE_PMD_NR))
						start = index + 1;
				}
			}
			unlock_page(page);
			folio_unlock(folio);
		}
		pagevec_remove_exceptionals(&pvec);
		pagevec_release(&pvec);
		folio_batch_remove_exceptionals(&fbatch);
		folio_batch_release(&fbatch);
		index++;
	}

+24 −19
Original line number Diff line number Diff line
@@ -108,6 +108,13 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
	pvec->nr = j;
}

static void truncate_folio_batch_exceptionals(struct address_space *mapping,
				struct folio_batch *fbatch, pgoff_t *indices)
{
	truncate_exceptional_pvec_entries(mapping, (struct pagevec *)fbatch,
						indices);
}

/*
 * Invalidate exceptional entry if easily possible. This handles exceptional
 * entries for invalidate_inode_pages().
@@ -297,6 +304,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
	unsigned int	partial_start;	/* inclusive */
	unsigned int	partial_end;	/* exclusive */
	struct pagevec	pvec;
	struct folio_batch fbatch;
	pgoff_t		indices[PAGEVEC_SIZE];
	pgoff_t		index;
	int		i;
@@ -379,10 +387,11 @@ void truncate_inode_pages_range(struct address_space *mapping,
	if (start >= end)
		goto out;

	folio_batch_init(&fbatch);
	index = start;
	for ( ; ; ) {
		cond_resched();
		if (!find_get_entries(mapping, index, end - 1, &pvec,
		if (!find_get_entries(mapping, index, end - 1, &fbatch,
				indices)) {
			/* If all gone from start onwards, we're done */
			if (index == start)
@@ -392,16 +401,14 @@ void truncate_inode_pages_range(struct address_space *mapping,
			continue;
		}

		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];
			struct folio *folio;
		for (i = 0; i < folio_batch_count(&fbatch); i++) {
			struct folio *folio = fbatch.folios[i];

			/* We rely upon deletion not changing page->index */
			index = indices[i];

			if (xa_is_value(page))
			if (xa_is_value(folio))
				continue;
			folio = page_folio(page);

			folio_lock(folio);
			VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
@@ -410,8 +417,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
			folio_unlock(folio);
			index = folio_index(folio) + folio_nr_pages(folio) - 1;
		}
		truncate_exceptional_pvec_entries(mapping, &pvec, indices);
		pagevec_release(&pvec);
		truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
		folio_batch_release(&fbatch);
		index++;
	}

@@ -625,7 +632,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
				  pgoff_t start, pgoff_t end)
{
	pgoff_t indices[PAGEVEC_SIZE];
	struct pagevec pvec;
	struct folio_batch fbatch;
	pgoff_t index;
	int i;
	int ret = 0;
@@ -635,23 +642,21 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
	if (mapping_empty(mapping))
		goto out;

	pagevec_init(&pvec);
	folio_batch_init(&fbatch);
	index = start;
	while (find_get_entries(mapping, index, end, &pvec, indices)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];
			struct folio *folio;
	while (find_get_entries(mapping, index, end, &fbatch, indices)) {
		for (i = 0; i < folio_batch_count(&fbatch); i++) {
			struct folio *folio = fbatch.folios[i];

			/* We rely upon deletion not changing folio->index */
			index = indices[i];

			if (xa_is_value(page)) {
			if (xa_is_value(folio)) {
				if (!invalidate_exceptional_entry2(mapping,
								   index, page))
						index, folio))
					ret = -EBUSY;
				continue;
			}
			folio = page_folio(page);

			if (!did_range_unmap && folio_mapped(folio)) {
				/*
@@ -684,8 +689,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
				ret = ret2;
			folio_unlock(folio);
		}
		pagevec_remove_exceptionals(&pvec);
		pagevec_release(&pvec);
		folio_batch_remove_exceptionals(&fbatch);
		folio_batch_release(&fbatch);
		cond_resched();
		index++;
	}