Commit 3649ebdb authored by Baolin Wang's avatar Baolin Wang Committed by Euler
Browse files

mm: shmem: improve the tmpfs large folio read performance

mainline inclusion
from mainline-v6.12-rc1
commit a284cb8472ec6bb027ebf3b936385601d8a8f414
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IBFSMJ

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a284cb8472ec6bb027ebf3b936385601d8a8f414

--------------------------------

tmpfs already supports PMD-sized large folios, but the tmpfs read
operation still performs copying at PAGE_SIZE granularity, which is
unreasonable.  This patch changes tmpfs to copy data at folio granularity,
which can improve the read performance, as well as changing to use folio
related functions.

Moreover, if a large folio has a subpage that is hwpoisoned, it will
still fall back to page granularity copying.

Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can
see about 20% performance improvement, and no regression with bs=4k.
Before the patch:
READ: bw=10.0GiB/s

After the patch:
READ: bw=12.0GiB/s

Link: https://lkml.kernel.org/r/2129a21a5b9f77d3bb7ddec152c009ce7c5653c4.1729218573.git.baolin.wang@linux.alibaba.com


Signed-off-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: default avatarYang Shi <shy828301@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarWang Lian <dev01404@linx-info.com>
---
parent b2770276
Loading
Loading
Loading
Loading
+24 −10
Original line number Diff line number Diff line
@@ -3068,13 +3068,13 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
	int error = 0;
	ssize_t retval = 0;

	offset = iocb->ki_pos & ~PAGE_MASK;

	for (;;) {
		struct folio *folio = NULL;
		struct page *page = NULL;
		unsigned long nr, ret;
		loff_t end_offset, i_size = i_size_read(inode);
		bool fallback_page_copy = false;
		size_t fsize;

		if (unlikely(iocb->ki_pos >= i_size))
			break;
@@ -3095,6 +3095,10 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
				error = -EIO;
				break;
			}

			if (folio_test_large(folio) &&
			    folio_test_has_hwpoisoned(folio))
				fallback_page_copy = true;
		}

		/*
@@ -3108,7 +3112,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
			break;
		}
		end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
		nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
		if (folio && likely(!fallback_page_copy))
			fsize = folio_size(folio);
		else
			fsize = PAGE_SIZE;
		offset = iocb->ki_pos & (fsize - 1);
		nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);

		if (folio) {
			/*
@@ -3116,10 +3125,15 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
			 * virtual addresses, take care about potential aliasing
			 * before reading the page on the kernel side.
			 */
			if (mapping_writably_mapped(mapping))
			if (mapping_writably_mapped(mapping)) {
				if (likely(!fallback_page_copy))
					flush_dcache_folio(folio);
				else
					flush_dcache_page(page);
			}

			/*
			 * Mark the page accessed if we read the beginning.
			 * Mark the folio accessed if we read the beginning.
			 */
			if (!offset)
				folio_mark_accessed(folio);
@@ -3127,9 +3141,11 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
			 * Ok, we have the page, and it's up-to-date, so
			 * now we can copy it to user space...
			 */
			if (likely(!fallback_page_copy))
				ret = copy_folio_to_iter(folio, offset, nr, to);
			else
				ret = copy_page_to_iter(page, offset, nr, to);
			folio_put(folio);

		} else if (user_backed_iter(to)) {
			/*
			 * Copy to user tends to be so well optimized, but
@@ -3147,8 +3163,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
		}

		retval += ret;
		offset += ret;
		offset &= ~PAGE_MASK;
		iocb->ki_pos += ret;

		if (!iov_iter_count(to))