Unverified Commit b6c2b243 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!14690 Improve the tmpfs large folio read performance

Merge Pull Request from: @wedm23414 
 
Patch series "Improve the tmpfs large folio read performance", v2.

tmpfs already supports PMD-sized large folios, but the tmpfs read
operation still performs copying at PAGE_SIZE granularity, which is not
perfect.  This patchset changes tmpfs to copy data at the folio
granularity, which can improve the read performance.

Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can
see about 20% performance improvement, and no regression with bs=4k.  I
also did some functional testing with the xfstests suite, and I did not
find any regressions with the following xfstests config:

  FSTYP=tmpfs
  export TEST_DIR=/mnt/tempfs_mnt
  export TEST_DEV=/mnt/tempfs_mnt
  export SCRATCH_MNT=/mnt/scratchdir
  export SCRATCH_DEV=/mnt/scratchdir


This patch (of 2):

Using iocb->ki_pos to check if the read bytes exceeds the file size and to
calculate the bytes to be read can help simplify the code logic. 
Meanwhile, this is also a preparation for improving tmpfs large folios
read performance in the following patch.


Baolin Wang (3):
  mm: shmem: update iocb->ki_pos directly to simplify tmpfs read logic
  mm: shmem: improve the tmpfs large folio read performance
  mm: shmem: fallback to page size splice if large folio has poisoned
    pages

-- 
2.34.1
 
 
Link:https://gitee.com/openeuler/kernel/pulls/14690 
parents c2af3bb9 d2a87be6
Loading
Loading
Loading
Loading
+63 −40
Original line number Diff line number Diff line
@@ -3070,27 +3070,19 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
	unsigned long offset;
	int error = 0;
	ssize_t retval = 0;
	loff_t *ppos = &iocb->ki_pos;

	index = *ppos >> PAGE_SHIFT;
	offset = *ppos & ~PAGE_MASK;

	for (;;) {
		struct folio *folio = NULL;
		struct page *page = NULL;
		pgoff_t end_index;
		unsigned long nr, ret;
		loff_t i_size = i_size_read(inode);
		loff_t end_offset, i_size = i_size_read(inode);
		bool fallback_page_copy = false;
		size_t fsize;

		end_index = i_size >> PAGE_SHIFT;
		if (index > end_index)
		if (unlikely(iocb->ki_pos >= i_size))
			break;
		if (index == end_index) {
			nr = i_size & ~PAGE_MASK;
			if (nr <= offset)
				break;
		}

		index = iocb->ki_pos >> PAGE_SHIFT;
		error = shmem_get_folio(inode, index, 0, &folio, SGP_READ);
		if (error) {
			if (error == -EINVAL)
@@ -3106,24 +3098,29 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
				error = -EIO;
				break;
			}

			if (folio_test_large(folio) &&
			    folio_test_has_hwpoisoned(folio))
				fallback_page_copy = true;
		}

		/*
		 * We must evaluate after, since reads (unlike writes)
		 * are called without i_rwsem protection against truncate
		 */
		nr = PAGE_SIZE;
		i_size = i_size_read(inode);
		end_index = i_size >> PAGE_SHIFT;
		if (index == end_index) {
			nr = i_size & ~PAGE_MASK;
			if (nr <= offset) {
		if (unlikely(iocb->ki_pos >= i_size)) {
			if (folio)
				folio_put(folio);
			break;
		}
		}
		nr -= offset;
		end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
		if (folio && likely(!fallback_page_copy))
			fsize = folio_size(folio);
		else
			fsize = PAGE_SIZE;
		offset = iocb->ki_pos & (fsize - 1);
		nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);

		if (folio) {
			/*
@@ -3131,10 +3128,15 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
			 * virtual addresses, take care about potential aliasing
			 * before reading the page on the kernel side.
			 */
			if (mapping_writably_mapped(mapping))
			if (mapping_writably_mapped(mapping)) {
				if (likely(!fallback_page_copy))
					flush_dcache_folio(folio);
				else
					flush_dcache_page(page);
			}

			/*
			 * Mark the page accessed if we read the beginning.
			 * Mark the folio accessed if we read the beginning.
			 */
			if (!offset)
				folio_mark_accessed(folio);
@@ -3142,9 +3144,11 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
			 * Ok, we have the page, and it's up-to-date, so
			 * now we can copy it to user space...
			 */
			if (likely(!fallback_page_copy))
				ret = copy_folio_to_iter(folio, offset, nr, to);
			else
				ret = copy_page_to_iter(page, offset, nr, to);
			folio_put(folio);

		} else if (user_backed_iter(to)) {
			/*
			 * Copy to user tends to be so well optimized, but
@@ -3162,9 +3166,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
		}

		retval += ret;
		offset += ret;
		index += offset >> PAGE_SHIFT;
		offset &= ~PAGE_MASK;
		iocb->ki_pos += ret;

		if (!iov_iter_count(to))
			break;
@@ -3175,7 +3177,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
		cond_resched();
	}

	*ppos = ((loff_t) index << PAGE_SHIFT) + offset;
	file_accessed(file);
	return retval ? retval : error;
}
@@ -3269,11 +3270,16 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
	len = min_t(size_t, len, npages * PAGE_SIZE);

	do {
		bool fallback_page_splice = false;
		struct page *page = NULL;
		pgoff_t index;
		size_t size;

		if (*ppos >= i_size_read(inode))
			break;

		error = shmem_get_folio(inode, *ppos / PAGE_SIZE, 0, &folio,
					SGP_READ);
		index = *ppos >> PAGE_SHIFT;
		error = shmem_get_folio(inode, index, 0, &folio, SGP_READ);
		if (error) {
			if (error == -EINVAL)
				error = 0;
@@ -3282,12 +3288,15 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
		if (folio) {
			folio_unlock(folio);

			if (folio_test_hwpoison(folio) ||
			    (folio_test_large(folio) &&
			     folio_test_has_hwpoisoned(folio))) {
			page = folio_file_page(folio, index);
			if (PageHWPoison(page)) {
				error = -EIO;
				break;
			}

			if (folio_test_large(folio) &&
			    folio_test_has_hwpoisoned(folio))
				fallback_page_splice = true;
		}

		/*
@@ -3301,7 +3310,17 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
		isize = i_size_read(inode);
		if (unlikely(*ppos >= isize))
			break;
		part = min_t(loff_t, isize - *ppos, len);
		/*
		 * Fallback to PAGE_SIZE splice if the large folio has hwpoisoned
		 * pages.
		 */
		size = len;
		if (unlikely(fallback_page_splice)) {
			size_t offset = *ppos & ~PAGE_MASK;

			size = umin(size, PAGE_SIZE - offset);
		}
		part = min_t(loff_t, isize - *ppos, size);

		if (folio) {
			/*
@@ -3309,8 +3328,12 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
			 * virtual addresses, take care about potential aliasing
			 * before reading the page on the kernel side.
			 */
			if (mapping_writably_mapped(mapping))
			if (mapping_writably_mapped(mapping)) {
				if (likely(!fallback_page_splice))
					flush_dcache_folio(folio);
				else
					flush_dcache_page(page);
			}
			folio_mark_accessed(folio);
			/*
			 * Ok, we have the page, and it's up-to-date, so we can