Commit 62e2e688 authored by Dave Chinner's avatar Dave Chinner Committed by Long Li
Browse files

xfs: use iomap_valid method to detect stale cached iomaps

mainline inclusion
from mainline-v6.1-rc4
commit 304a68b9
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I76JSK
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=304a68b9c63bbfc1f6e159d68e8892fc54a06067

--------------------------------

Now that iomap supports a mechanism to validate cached iomaps for
buffered write operations, hook it up to the XFS buffered write ops
so that we can avoid data corruptions that result from stale cached
iomaps. See:

https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/



or the ->iomap_valid() introduction commit for exact details of the
corruption vector.

The validity cookie we store in the iomap is based on the type of
iomap we return. It is expected that the iomap->flags we set in
xfs_bmbt_to_iomap() is not perturbed by the iomap core and are
returned to us in the iomap passed via the .iomap_valid() callback.
This ensures that the validity cookie is always checking the correct
inode fork sequence numbers to detect potential changes that affect
the extent cached by the iomap.

Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>

conflicts:
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.h
fs/xfs/xfs_pnfs.c

Signed-off-by: default avatarYe Bin <yebini10@huawei.com>
Signed-off-by: default avatarLong Li <leo.lilong@huawei.com>
parent 56ee791d
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -4539,7 +4539,8 @@ xfs_bmapi_convert_delalloc(
	 * the extent.  Just return the real extent at this offset.
	 */
	if (!isnullstartblock(bma.got.br_startblock)) {
		xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
		xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags,
				  xfs_iomap_inode_sequence(ip, flags));
		*seq = READ_ONCE(ifp->if_seq);
		goto out_trans_cancel;
	}
@@ -4586,7 +4587,8 @@ xfs_bmapi_convert_delalloc(
	XFS_STATS_INC(mp, xs_xstrat_quick);

	ASSERT(!isnullstartblock(bma.got.br_startblock));
	xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
	xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags,
			  xfs_iomap_inode_sequence(ip, flags));
	*seq = READ_ONCE(ifp->if_seq);

	if (whichfork == XFS_COW_FORK)
+1 −1
Original line number Diff line number Diff line
@@ -436,7 +436,7 @@ xfs_map_blocks(
	    isnullstartblock(imap.br_startblock))
		goto allocate_blocks;

	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0);
	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, XFS_WPC(wpc)->data_seq);
	trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
	return 0;
allocate_blocks:
+76 −20
Original line number Diff line number Diff line
@@ -49,12 +49,44 @@ xfs_alert_fsblock_zero(
	return -EFSCORRUPTED;
}

u64
xfs_iomap_inode_sequence(
	struct xfs_inode	*ip,
	u16			iomap_flags)
{
	u64			cookie = 0;

	if (iomap_flags & IOMAP_F_XATTR)
		return READ_ONCE(ip->i_af.if_seq);
	if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp)
		cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32;
	return cookie | READ_ONCE(ip->i_df.if_seq);
}

/*
 * Check that the iomap passed to us is still valid for the given offset and
 * length.
 */
static bool
xfs_iomap_valid(
	struct inode		*inode,
	const struct iomap	*iomap)
{
	return iomap->validity_cookie ==
			xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags);
}

const struct iomap_page_ops xfs_iomap_page_ops = {
	.iomap_valid		= xfs_iomap_valid,
};

int
xfs_bmbt_to_iomap(
	struct xfs_inode	*ip,
	struct iomap		*iomap,
	struct xfs_bmbt_irec	*imap,
	u16			flags)
	u16			flags,
	u64			sequence_cookie)
{
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
@@ -85,6 +117,9 @@ xfs_bmbt_to_iomap(
	if (xfs_ipincount(ip) &&
	    (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
		iomap->flags |= IOMAP_F_DIRTY;

	iomap->validity_cookie = sequence_cookie;
	iomap->page_ops = &xfs_iomap_page_ops;
	return 0;
}

@@ -188,7 +223,8 @@ xfs_iomap_write_direct(
	struct xfs_inode	*ip,
	xfs_fileoff_t		offset_fsb,
	xfs_fileoff_t		count_fsb,
	struct xfs_bmbt_irec	*imap)
	struct xfs_bmbt_irec	*imap,
	u64			*seq)
{
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_trans	*tp;
@@ -276,6 +312,7 @@ xfs_iomap_write_direct(
		error = xfs_alert_fsblock_zero(ip, imap);

out_unlock:
	*seq = xfs_iomap_inode_sequence(ip, 0);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return error;

@@ -731,6 +768,7 @@ xfs_direct_write_iomap_begin(
	bool			shared = false;
	u16			iomap_flags = 0;
	unsigned		lockmode;
	u64			seq;

	ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));

@@ -785,9 +823,10 @@ xfs_direct_write_iomap_begin(
		goto out_unlock;
	}

	seq = xfs_iomap_inode_sequence(ip, iomap_flags);
	xfs_iunlock(ip, lockmode);
	trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags, seq);

allocate_blocks:
	error = -EAGAIN;
@@ -813,23 +852,25 @@ xfs_direct_write_iomap_begin(
	xfs_iunlock(ip, lockmode);

	error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
			&imap);
			&imap, &seq);
	if (error)
		return error;

	trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW, seq);

out_found_cow:
	xfs_iunlock(ip, lockmode);
	length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
	trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
	if (imap.br_startblock != HOLESTARTBLOCK) {
		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
		seq = xfs_iomap_inode_sequence(ip, 0);
		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0, seq);
		if (error)
			return error;
			goto out_unlock;
	}
	return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
	xfs_iunlock(ip, lockmode);
	return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED, seq);

out_unlock:
	if (lockmode)
@@ -860,6 +901,7 @@ xfs_buffered_write_iomap_begin(
	bool			eof = false, cow_eof = false, shared = false;
	int			allocfork = XFS_DATA_FORK;
	int			error = 0;
	u64			seq;

	if (xfs_is_shutdown(mp))
		return -EIO;
@@ -1039,25 +1081,30 @@ xfs_buffered_write_iomap_begin(
	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
	 * them out if the write happens to fail.
	 */
	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW, seq);

found_imap:
	seq = xfs_iomap_inode_sequence(ip, 0);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, 0, seq);

found_cow:
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	seq = xfs_iomap_inode_sequence(ip, 0);
	if (imap.br_startoff <= offset_fsb) {
		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0, seq);
		if (error)
			return error;
		return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
			goto out_unlock;
		seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED, seq);
	}

	xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
	return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0, seq);

out_unlock:
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1157,6 +1204,7 @@ xfs_read_iomap_begin(
	int			nimaps = 1, error = 0;
	bool			shared = false;
	unsigned		lockmode;
	u64			seq;

	ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));

@@ -1170,12 +1218,14 @@ xfs_read_iomap_begin(
			       &nimaps, 0);
	if (!error && (flags & IOMAP_REPORT))
		error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
	seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0);
	xfs_iunlock(ip, lockmode);

	if (error)
		return error;
	trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0);
	return xfs_bmbt_to_iomap(ip, iomap, &imap,
				 shared ? IOMAP_F_SHARED : 0, seq);
}

const struct iomap_ops xfs_read_iomap_ops = {
@@ -1200,6 +1250,7 @@ xfs_seek_iomap_begin(
	struct xfs_bmbt_irec	imap, cmap;
	int			error = 0;
	unsigned		lockmode;
	u64			seq;

	if (xfs_is_shutdown(mp))
		return -EIO;
@@ -1236,7 +1287,8 @@ xfs_seek_iomap_begin(
		if (data_fsb < cow_fsb + cmap.br_blockcount)
			end_fsb = min(end_fsb, data_fsb);
		xfs_trim_extent(&cmap, offset_fsb, end_fsb);
		error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
		seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
		error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED, seq);
		/*
		 * This is a COW extent, so we must probe the page cache
		 * because there could be dirty page cache being backed
@@ -1257,8 +1309,9 @@ xfs_seek_iomap_begin(
	imap.br_startblock = HOLESTARTBLOCK;
	imap.br_state = XFS_EXT_NORM;
done:
	seq = xfs_iomap_inode_sequence(ip, 0);
	xfs_trim_extent(&imap, offset_fsb, end_fsb);
	error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
	error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, seq);
out_unlock:
	xfs_iunlock(ip, lockmode);
	return error;
@@ -1284,6 +1337,7 @@ xfs_xattr_iomap_begin(
	struct xfs_bmbt_irec	imap;
	int			nimaps = 1, error = 0;
	unsigned		lockmode;
	int			seq;

	if (xfs_is_shutdown(mp))
		return -EIO;
@@ -1300,12 +1354,14 @@ xfs_xattr_iomap_begin(
	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
			       &nimaps, XFS_BMAPI_ATTRFORK);
out_unlock:

	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR);
	xfs_iunlock(ip, lockmode);

	if (error)
		return error;
	ASSERT(nimaps);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
	return xfs_bmbt_to_iomap(ip, iomap, &imap, 0, seq);
}

const struct iomap_ops xfs_xattr_iomap_ops = {
+4 −2
Original line number Diff line number Diff line
@@ -12,13 +12,15 @@ struct xfs_inode;
struct xfs_bmbt_irec;

int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
		xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap);
		xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap,
		u64 *sequence);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
		xfs_fileoff_t end_fsb);

u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags);
int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
		struct xfs_bmbt_irec *, u16);
		struct xfs_bmbt_irec *, u16, u64 sequence_cookie);

static inline xfs_filblks_t
xfs_aligned_fsb_count(
+4 −2
Original line number Diff line number Diff line
@@ -91,6 +91,7 @@ xfs_fs_map_blocks(
	int			nimaps = 1;
	uint			lock_flags;
	int			error = 0;
	u64			seq;

	if (xfs_is_shutdown(mp))
		return -EIO;
@@ -142,6 +143,7 @@ xfs_fs_map_blocks(
	lock_flags = xfs_ilock_data_map_shared(ip);
	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
				&imap, &nimaps, bmapi_flags);
	seq = xfs_iomap_inode_sequence(ip, 0);

	ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK);

@@ -155,7 +157,7 @@ xfs_fs_map_blocks(
		xfs_iunlock(ip, lock_flags);

		error = xfs_iomap_write_direct(ip, offset_fsb,
				end_fsb - offset_fsb, &imap);
				end_fsb - offset_fsb, &imap, &seq);
		if (error)
			goto out_unlock;

@@ -175,7 +177,7 @@ xfs_fs_map_blocks(
	}
	xfs_iunlock(ip, XFS_IOLOCK_EXCL);

	error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
	error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, seq);
	*device_generation = mp->m_generation;
	return error;
out_unlock: