Commit cbd59c48 authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle) Committed by Linus Torvalds
Browse files

mm/filemap: use head pages in generic_file_buffered_read

Add filemap_get_read_batch() which returns the head pages which represent
a contiguous array of bytes in the file.  It also stops when encountering
a page marked as Readahead or !Uptodate (but does return that page) so it
can be handled appropriately by filemap_get_pages().  That lets us remove
the loop in filemap_get_pages() and check only the last page.

Link: https://lkml.kernel.org/r/20210122160140.223228-5-willy@infradead.org


Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ff993ba1
Loading
Loading
Loading
Loading
+85 −37
Original line number Diff line number Diff line
@@ -2178,6 +2178,51 @@ static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
		return lock_page_killable(page);
}

/*
 * filemap_get_read_batch - Get a batch of pages for read
 *
 * Get a batch of pages which represent a contiguous range of bytes
 * in the file.  No tail pages will be returned.  If @index is in the
 * middle of a THP, the entire THP will be returned.  The last page in
 * the batch may have Readahead set or be not Uptodate so that the
 * caller can take the appropriate action.
 */
static void filemap_get_read_batch(struct address_space *mapping,
		pgoff_t index, pgoff_t max, struct pagevec *pvec)
{
	XA_STATE(xas, &mapping->i_pages, index);
	struct page *head;

	rcu_read_lock();
	for (head = xas_load(&xas); head; head = xas_next(&xas)) {
		if (xas_retry(&xas, head))
			continue;
		if (xas.xa_index > max || xa_is_value(head))
			break;
		if (!page_cache_get_speculative(head))
			goto retry;

		/* Has the page moved or been split? */
		if (unlikely(head != xas_reload(&xas)))
			goto put_page;

		if (!pagevec_add(pvec, head))
			break;
		if (!PageUptodate(head))
			break;
		if (PageReadahead(head))
			break;
		xas.xa_index = head->index + thp_nr_pages(head) - 1;
		xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
		continue;
put_page:
		put_page(head);
retry:
		xas_reset(&xas);
	}
	rcu_read_unlock();
}

static struct page *filemap_read_page(struct kiocb *iocb, struct file *filp,
		struct address_space *mapping, struct page *page)
{
@@ -2331,15 +2376,15 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
	struct address_space *mapping = filp->f_mapping;
	struct file_ra_state *ra = &filp->f_ra;
	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
	unsigned int nr = min_t(unsigned long, last_index - index, PAGEVEC_SIZE);
	int i, j, err = 0;
	pgoff_t last_index;
	int err = 0;

	last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
find_page:
	if (fatal_signal_pending(current))
		return -EINTR;

	pvec->nr = find_get_pages_contig(mapping, index, nr, pvec->pages);
	filemap_get_read_batch(mapping, index, last_index, pvec);
	if (pvec->nr)
		goto got_pages;

@@ -2348,29 +2393,30 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,

	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);

	pvec->nr = find_get_pages_contig(mapping, index, nr, pvec->pages);
	filemap_get_read_batch(mapping, index, last_index, pvec);
	if (pvec->nr)
		goto got_pages;

	pvec->pages[0] = filemap_create_page(iocb, iter);
	err = PTR_ERR_OR_ZERO(pvec->pages[0]);
	if (!IS_ERR_OR_NULL(pvec->pages[0]))
	if (IS_ERR_OR_NULL(pvec->pages[0]))
		goto err;
	pvec->nr = 1;
	return 0;
got_pages:
	for (i = 0; i < pvec->nr; i++) {
		struct page *page = pvec->pages[i];
		pgoff_t pg_index = index + i;
	{
		struct page *page = pvec->pages[pvec->nr - 1];
		pgoff_t pg_index = page->index;
		loff_t pg_pos = max(iocb->ki_pos,
				    (loff_t) pg_index << PAGE_SHIFT);
		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;

		if (PageReadahead(page)) {
			if (iocb->ki_flags & IOCB_NOIO) {
				for (j = i; j < pvec->nr; j++)
					put_page(pvec->pages[j]);
				pvec->nr = i;
				put_page(page);
				pvec->nr--;
				err = -EAGAIN;
				break;
				goto err;
			}
			page_cache_async_readahead(mapping, ra, filp, page,
					pg_index, last_index - pg_index);
@@ -2378,26 +2424,23 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,

		if (!PageUptodate(page)) {
			if ((iocb->ki_flags & IOCB_NOWAIT) ||
			    ((iocb->ki_flags & IOCB_WAITQ) && i)) {
				for (j = i; j < pvec->nr; j++)
					put_page(pvec->pages[j]);
				pvec->nr = i;
			    ((iocb->ki_flags & IOCB_WAITQ) && pvec->nr > 1)) {
				put_page(page);
				pvec->nr--;
				err = -EAGAIN;
				break;
				goto err;
			}

			page = filemap_update_page(iocb, filp, iter, page,
					pg_pos, pg_count);
			if (IS_ERR_OR_NULL(page)) {
				for (j = i + 1; j < pvec->nr; j++)
					put_page(pvec->pages[j]);
				pvec->nr = i;
				pvec->nr--;
				err = PTR_ERR_OR_ZERO(page);
				break;
			}
		}
	}

err:
	if (likely(pvec->nr))
		return 0;
	if (err)
@@ -2442,6 +2485,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
		return 0;

	iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
	pagevec_init(&pvec);

	do {
		cond_resched();
@@ -2469,13 +2513,8 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
		isize = i_size_read(inode);
		if (unlikely(iocb->ki_pos >= isize))
			goto put_pages;

		end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);

		while ((iocb->ki_pos >> PAGE_SHIFT) + pvec.nr >
		       (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
			put_page(pvec.pages[--pvec.nr]);

		/*
		 * Once we start copying data, we don't want to be touching any
		 * cachelines that might be contended:
@@ -2489,24 +2528,32 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
		if (iocb->ki_pos >> PAGE_SHIFT !=
		    ra->prev_pos >> PAGE_SHIFT)
			mark_page_accessed(pvec.pages[0]);
		for (i = 1; i < pagevec_count(&pvec); i++)
			mark_page_accessed(pvec.pages[i]);

		for (i = 0; i < pagevec_count(&pvec); i++) {
			unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
			unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
						   PAGE_SIZE - offset);
			unsigned int copied;
			struct page *page = pvec.pages[i];
			size_t page_size = thp_size(page);
			size_t offset = iocb->ki_pos & (page_size - 1);
			size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
					     page_size - offset);
			size_t copied;

			if (end_offset < page_offset(page))
				break;
			if (i > 0)
				mark_page_accessed(page);
			/*
			 * If users can be writing to this page using arbitrary
			 * virtual addresses, take care about potential aliasing
			 * before reading the page on the kernel side.
			 */
			if (writably_mapped)
				flush_dcache_page(pvec.pages[i]);
			if (writably_mapped) {
				int j;

				for (j = 0; j < thp_nr_pages(page); j++)
					flush_dcache_page(page + j);
			}

			copied = copy_page_to_iter(pvec.pages[i], offset, bytes, iter);
			copied = copy_page_to_iter(page, offset, bytes, iter);

			written += copied;
			iocb->ki_pos += copied;
@@ -2520,6 +2567,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
put_pages:
		for (i = 0; i < pagevec_count(&pvec); i++)
			put_page(pvec.pages[i]);
		pagevec_reinit(&pvec);
	} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);

	file_accessed(filp);