Commit 6d37fb9c authored by John Garry's avatar John Garry Committed by Long Li
Browse files

fs: iomap: Sub-extent zeroing

maillist inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I9VTE3
CVE: NA

Reference: https://lore.kernel.org/all/20240326133813.3224593-1-john.g.garry@oracle.com/



--------------------------------

For FS_XFLAG_FORCEALIGN support, we want to treat any sub-extent IO like
sub-fsblock DIO, in that we will zero the sub-extent when the mapping is
unwritten.

This will be important for atomic writes support, in that atomically
writing over a partially written extent would mean that we would need to
do the unwritten extent conversion write separately, and the write could
no longer be atomic.

It is the task of the FS to set iomap.extent_shift per iter to indicate
sub-extent zeroing required.

Maybe a macro like i_blocksize() should be introduced for extent sizes,
instead of using extent_shift.

Signed-off-by: default avatarJohn Garry <john.g.garry@oracle.com>
Signed-off-by: default avatarLong Li <leo.lilong@huawei.com>
parent bb5092a7
Loading
Loading
Loading
Loading
+16 −7
Original line number Diff line number Diff line
@@ -210,15 +210,22 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
	struct page *page = ZERO_PAGE(0);
	int flags = REQ_SYNC | REQ_IDLE;
	struct bio *bio;
	unsigned size;
	unsigned nr_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;

	bio = bio_alloc(GFP_KERNEL, 1);
	bio = bio_alloc(GFP_KERNEL, nr_pages);
	bio_set_dev(bio, iomap->bdev);
	bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
	bio->bi_private = dio;
	bio->bi_end_io = iomap_dio_bio_end_io;

	while (len > 0) {
		size = len > PAGE_SIZE ? PAGE_SIZE : len;
		get_page(page);
	__bio_add_page(bio, page, len, 0);
		__bio_add_page(bio, page, size, 0);
		len -= size;
		pos += size;
	}
	bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
	iomap_dio_submit_bio(dio, iomap, bio, pos);
}
@@ -228,7 +235,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
		struct iomap_dio *dio, struct iomap *iomap)
{
	unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
	unsigned int fs_block_size = i_blocksize(inode), pad;
	unsigned int zeroing_size, pad;
	unsigned int align = iov_iter_alignment(dio->submit.iter);
	struct bio *bio;
	bool need_zeroout = false;
@@ -237,6 +244,8 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
	size_t copied = 0;
	size_t orig_count;

	zeroing_size = i_blocksize(inode) << iomap->extent_shift;

	if ((pos | length | align) & ((1 << blkbits) - 1))
		return -EINVAL;

@@ -280,7 +289,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,

	if (need_zeroout) {
		/* zero out from the start of the block to the write offset */
		pad = pos & (fs_block_size - 1);
		pad = pos & (zeroing_size - 1);
		if (pad)
			iomap_dio_zero(dio, iomap, pos - pad, pad);
	}
@@ -345,9 +354,9 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
	if (need_zeroout ||
	    ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
		/* zero out from the end of the write to the end of the block */
		pad = pos & (fs_block_size - 1);
		pad = pos & (zeroing_size - 1);
		if (pad)
			iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
			iomap_dio_zero(dio, iomap, pos, zeroing_size - pad);
	}
out:
	/* Undo iter limitation to current extent */
+1 −0
Original line number Diff line number Diff line
@@ -93,6 +93,7 @@ struct iomap {
	u64			length;	/* length of mapping, bytes */
	u16			type;	/* type of mapping */
	u16			flags;	/* flags for mapping */
	unsigned int		extent_shift;
	struct block_device	*bdev;	/* block device for I/O */
	struct dax_device	*dax_dev; /* dax_dev for dax operations */
	void			*inline_data;