Commit 8a4c9fc6 authored by Yu Kuai's avatar Yu Kuai
Browse files

iomap: add support to track dirty state of sub pages

hulk inclusion
category: performance
bugzilla: https://gitee.com/openeuler/kernel/issues/I8TKTW


CVE: NA

------------------------------------------

commit 9dc55f13 ("iomap: add support for sub-pagesize buffered I/O
without buffer heads") replace the per-block structure buffer_head with
the per-page structure iomap_page. However, iomap_page can't track the
dirty state of sub pages, which will cause performance issue since sub
pages will be writeback even if they are not dirty.

For example, if block size is 4k and page size is 64k:

dd if=/dev/zero of=testfile bs=4k count=16 oflag=sync

With buffer_head implementation, the above dd cmd will writeback 4k in
each round. However, with iomap_page implementation, the range of
writeback in each round is from the start of the page to the end offset
we just wrote.

Thus add support to track dirty state in iomap_page.

test environment:
platform:	arm64
pagesize:	64k
blocksize:	4k

test case:
dd if=/dev/zero of=/mnt/testfile bs=1M count=128
fio --ioengine=sync --rw=randwrite --iodepth=64 --name=test \
    --filename=/mnt/testfile --bs=4k --fsync=1

The test result is:
xfs with patch:		WRITE: bw=4609KiB/s (4720kB/s)
xfs without patch	WRITE: bw=2714KiB/s (2780kB/s)
ext4:			WRITE: bw=3840KiB/s (3932kB/s)

Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
parent 40a1ac08
Loading
Loading
Loading
Loading
+59 −11
Original line number Diff line number Diff line
@@ -121,8 +121,8 @@ iomap_page_create(struct inode *inode, struct page *page)
	iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
	atomic_set(&iop->read_count, 0);
	atomic_set(&iop->write_count, 0);
	spin_lock_init(&iop->uptodate_lock);
	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
	spin_lock_init(&iop->state_lock);
	bitmap_zero(iop->state, IOMAP_STATE_ARRAY_SIZE);

	/*
	 * migrate_page_move_mapping() assumes that pages with private data have
@@ -175,7 +175,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,

		/* move forward for each leading block marked uptodate */
		for (i = first; i <= last; i++) {
			if (!test_bit(i, iop->uptodate))
			if (!test_bit(i, iop->state))
				break;
			*pos += block_size;
			poff += block_size;
@@ -185,7 +185,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,

		/* truncate len if we find any trailing uptodate block(s) */
		for ( ; i <= last; i++) {
			if (test_bit(i, iop->uptodate)) {
			if (test_bit(i, iop->state)) {
				plen -= (last - i + 1) * block_size;
				last = i - 1;
				break;
@@ -209,6 +209,54 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
	*lenp = plen;
}

static void
iomap_set_range_dirty(struct page *page, unsigned int off,
		unsigned int len)
{
	struct inode *inode = page->mapping->host;
	unsigned int first = DIRTY_BITS(off >> inode->i_blkbits);
	unsigned int last = DIRTY_BITS((off + len - 1) >> inode->i_blkbits);
	unsigned long flags;
	struct iomap_page *iop;

	if (PageError(page))
		return;

	if (len)
		iomap_set_page_dirty(page);

	if (!page_has_private(page))
		return;

	iop = to_iomap_page(page);
	spin_lock_irqsave(&iop->state_lock, flags);
	bitmap_set(iop->state, first, last - first + 1);
	spin_unlock_irqrestore(&iop->state_lock, flags);
}

void
iomap_clear_range_dirty(struct page *page, unsigned int off,
		unsigned int len)
{
	struct inode *inode = page->mapping->host;
	unsigned int first = DIRTY_BITS(off >> inode->i_blkbits);
	unsigned int last = DIRTY_BITS((off + len - 1) >> inode->i_blkbits);
	unsigned long flags;
	struct iomap_page *iop;

	if (PageError(page))
		return;

	if (!page_has_private(page))
		return;

	iop = to_iomap_page(page);
	spin_lock_irqsave(&iop->state_lock, flags);
	bitmap_clear(iop->state, first, last - first + 1);
	spin_unlock_irqrestore(&iop->state_lock, flags);
}
EXPORT_SYMBOL_GPL(iomap_clear_range_dirty);

static void
iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
{
@@ -220,17 +268,17 @@ iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
	unsigned long flags;
	unsigned int i;

	spin_lock_irqsave(&iop->uptodate_lock, flags);
	spin_lock_irqsave(&iop->state_lock, flags);
	for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
		if (i >= first && i <= last)
			set_bit(i, iop->uptodate);
		else if (!test_bit(i, iop->uptodate))
			set_bit(i, iop->state);
		else if (!test_bit(i, iop->state))
			uptodate = false;
	}

	if (uptodate)
		SetPageUptodate(page);
	spin_unlock_irqrestore(&iop->uptodate_lock, flags);
	spin_unlock_irqrestore(&iop->state_lock, flags);
}

static void
@@ -544,7 +592,7 @@ iomap_is_partially_uptodate(struct page *page, unsigned long from,

	if (iop) {
		for (i = first; i <= last; i++)
			if (!test_bit(i, iop->uptodate))
			if (!test_bit(i, iop->state))
				return 0;
		return 1;
	}
@@ -760,7 +808,7 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
	if (unlikely(copied < len && !PageUptodate(page)))
		return 0;
	iomap_set_range_uptodate(page, offset_in_page(pos), len);
	iomap_set_page_dirty(page);
	iomap_set_range_dirty(page, offset_in_page(pos), len);
	return copied;
}

@@ -1096,7 +1144,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
	} else {
		WARN_ON_ONCE(!PageUptodate(page));
		iomap_page_create(inode, page);
		set_page_dirty(page);
		iomap_set_range_dirty(page, offset_in_page(pos), length);
	}

	return length;
+2 −1
Original line number Diff line number Diff line
@@ -751,7 +751,7 @@ xfs_writepage_map(
	for (i = 0, file_offset = page_offset(page);
	     i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
	     i++, file_offset += len) {
		if (iop && !test_bit(i, iop->uptodate))
		if (iop && !test_bit(DIRTY_BITS(i), iop->state))
			continue;

		error = xfs_map_blocks(wpc, inode, file_offset);
@@ -800,6 +800,7 @@ xfs_writepage_map(
		 */
		set_page_writeback_keepwrite(page);
	} else {
		iomap_clear_range_dirty(page, 0, PAGE_SIZE);
		clear_page_dirty_for_io(page);
		set_page_writeback(page);
	}
+13 −2
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <linux/bitmap.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/blkdev.h>

struct address_space;
struct fiemap_extent_info;
@@ -53,6 +54,9 @@ struct vm_fault;
 */
#define IOMAP_NULL_ADDR -1ULL	/* addr is not valid */

#define DIRTY_BITS(x)	((x) + PAGE_SIZE / SECTOR_SIZE)
#define IOMAP_STATE_ARRAY_SIZE	(PAGE_SIZE * 2 / SECTOR_SIZE)

struct iomap {
	u64			addr; /* disk offset of mapping, bytes */
	loff_t			offset;	/* file offset of mapping, bytes */
@@ -114,8 +118,12 @@ struct iomap_ops {
struct iomap_page {
	atomic_t		read_count;
	atomic_t		write_count;
	spinlock_t              uptodate_lock;
	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
	spinlock_t              state_lock;
	/*
	 * The first half bits are used to track sub-page uptodate status,
	 * the second half bits are for dirty status.
	 */
	DECLARE_BITMAP(state, IOMAP_STATE_ARRAY_SIZE);
};

static inline struct iomap_page *to_iomap_page(struct page *page)
@@ -136,6 +144,9 @@ int iomap_is_partially_uptodate(struct page *page, unsigned long from,
int iomap_releasepage(struct page *page, gfp_t gfp_mask);
void iomap_invalidatepage(struct page *page, unsigned int offset,
		unsigned int len);
void iomap_clear_range_dirty(struct page *page, unsigned int off,
		unsigned int len);

#ifdef CONFIG_MIGRATION
int iomap_migrate_page(struct address_space *mapping, struct page *newpage,
		struct page *page, enum migrate_mode mode);