Commit d8a1c16d authored by Filipe Manana's avatar Filipe Manana Committed by Yifan Qiao
Browse files

btrfs: do not start and wait for delalloc on snapshot roots on transaction commit

mainline inclusion
from mainline-v5.11-rc1
commit 88090ad3
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I94K22

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=88090ad36a64af1eb5b78d26b2ccd07eedae80b5



------------------------------------------------------

We do not need anymore to start writeback for delalloc of roots that are
being snapshotted and wait for it to complete. This was done in commit
609e804d ("Btrfs: fix file corruption after snapshotting due to mix
of buffered/DIO writes") to fix a type of file corruption where files in a
snapshot end up having their i_size updated in a non-ordered way, leaving
implicit file holes, when buffered IO writes that increase a file's size
are followed by direct IO writes that also increase the file's size.

This is not needed anymore because we now have a more generic mechanism
to prevent a non-ordered i_size update since commit 9ddc959e
("btrfs: use the file extent tree infrastructure"), which addresses this
scenario involving snapshots as well.

Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarYifan Qiao <qiaoyifan4@huawei.com>
parent ebba5515
Loading
Loading
Loading
Loading
+6 −43
Original line number Diff line number Diff line
@@ -2015,10 +2015,8 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
       }
}

static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
	struct btrfs_fs_info *fs_info = trans->fs_info;

	/*
	 * We use writeback_inodes_sb here because if we used
	 * btrfs_start_delalloc_roots we would deadlock with fs freeze.
@@ -2028,50 +2026,15 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
	 * from already being in a transaction and our join_transaction doesn't
	 * have to re-take the fs freeze lock.
	 */
	if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
	if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
		writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
	} else {
		struct btrfs_pending_snapshot *pending;
		struct list_head *head = &trans->transaction->pending_snapshots;

		/*
		 * Flush dellaloc for any root that is going to be snapshotted.
		 * This is done to avoid a corrupted version of files, in the
		 * snapshots, that had both buffered and direct IO writes (even
		 * if they were done sequentially) due to an unordered update of
		 * the inode's size on disk.
		 */
		list_for_each_entry(pending, head, list) {
			int ret;

			ret = btrfs_start_delalloc_snapshot(pending->root);
			if (ret)
				return ret;
		}
	}
	return 0;
}

static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
	struct btrfs_fs_info *fs_info = trans->fs_info;

	if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
	if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
		btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
	} else {
		struct btrfs_pending_snapshot *pending;
		struct list_head *head = &trans->transaction->pending_snapshots;

		/*
		 * Wait for any dellaloc that we started previously for the roots
		 * that are going to be snapshotted. This is to avoid a corrupted
		 * version of files in the snapshots that had both buffered and
		 * direct IO writes (even if they were done sequentially).
		 */
		list_for_each_entry(pending, head, list)
			btrfs_wait_ordered_extents(pending->root,
						   U64_MAX, 0, U64_MAX);
	}
}

int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@@ -2209,7 +2172,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)

	extwriter_counter_dec(cur_trans, trans->type);

	ret = btrfs_start_delalloc_flush(trans);
	ret = btrfs_start_delalloc_flush(fs_info);
	if (ret)
		goto cleanup_transaction;

@@ -2225,7 +2188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
	if (ret)
		goto cleanup_transaction;

	btrfs_wait_delalloc_flush(trans);
	btrfs_wait_delalloc_flush(fs_info);

	/*
	 * Wait for all ordered extents started by a fast fsync that joined this