Commit d38c3fa6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - tiny race window during 2 transactions aborting at the same time can
   accidentally lead to a commit

 - regression fix, possible deadlock during fiemap

 - fix for an old bug when incremental send can fail on a file that has
   been deduplicated in a special way

* tag 'for-5.3-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  Btrfs: fix deadlock between fiemap and transaction commits
  Btrfs: fix race leading to fs corruption after transaction abort
  Btrfs: fix incremental send failure after deduplication
parents 97b00aff a6d155d2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1483,7 +1483,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
	ulist_init(roots);
	ulist_init(tmp);

	trans = btrfs_attach_transaction(root);
	trans = btrfs_join_transaction_nostart(root);
	if (IS_ERR(trans)) {
		if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
			ret = PTR_ERR(trans);
+15 −62
Original line number Diff line number Diff line
@@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx,
{
	int ret = 0;

	if (sctx->cur_ino != sctx->cmp_key->objectid) {

		if (result == BTRFS_COMPARE_TREE_CHANGED) {
			struct extent_buffer *leaf_l;
			struct extent_buffer *leaf_r;
			struct btrfs_file_extent_item *ei_l;
			struct btrfs_file_extent_item *ei_r;

			leaf_l = sctx->left_path->nodes[0];
			leaf_r = sctx->right_path->nodes[0];
			ei_l = btrfs_item_ptr(leaf_l,
					      sctx->left_path->slots[0],
					      struct btrfs_file_extent_item);
			ei_r = btrfs_item_ptr(leaf_r,
					      sctx->right_path->slots[0],
					      struct btrfs_file_extent_item);

	/*
			 * We may have found an extent item that has changed
			 * only its disk_bytenr field and the corresponding
			 * inode item was not updated. This case happens due to
			 * very specific timings during relocation when a leaf
			 * that contains file extent items is COWed while
			 * relocation is ongoing and its in the stage where it
			 * updates data pointers. So when this happens we can
			 * safely ignore it since we know it's the same extent,
			 * but just at different logical and physical locations
			 * (when an extent is fully replaced with a new one, we
			 * know the generation number must have changed too,
			 * since snapshot creation implies committing the current
			 * transaction, and the inode item must have been updated
			 * as well).
			 * This replacement of the disk_bytenr happens at
			 * relocation.c:replace_file_extents() through
			 * relocation.c:btrfs_reloc_cow_block().
			 */
			if (btrfs_file_extent_generation(leaf_l, ei_l) ==
			    btrfs_file_extent_generation(leaf_r, ei_r) &&
			    btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
			    btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
			    btrfs_file_extent_compression(leaf_l, ei_l) ==
			    btrfs_file_extent_compression(leaf_r, ei_r) &&
			    btrfs_file_extent_encryption(leaf_l, ei_l) ==
			    btrfs_file_extent_encryption(leaf_r, ei_r) &&
			    btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
			    btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
			    btrfs_file_extent_type(leaf_l, ei_l) ==
			    btrfs_file_extent_type(leaf_r, ei_r) &&
			    btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
			    btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
			    btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
			    btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
			    btrfs_file_extent_offset(leaf_l, ei_l) ==
			    btrfs_file_extent_offset(leaf_r, ei_r) &&
			    btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
			    btrfs_file_extent_num_bytes(leaf_r, ei_r))
	 * We have found an extent item that changed without the inode item
	 * having changed. This can happen either after relocation (where the
	 * disk_bytenr of an extent item is replaced at
	 * relocation.c:replace_file_extents()) or after deduplication into a
	 * file in both the parent and send snapshots (where an extent item can
	 * get modified or replaced with a new one). Note that deduplication
	 * updates the inode item, but it only changes the iversion (sequence
	 * field in the inode item) of the inode, so if a file is deduplicated
	 * the same amount of times in both the parent and send snapshots, its
	 * iversion becames the same in both snapshots, whence the inode item is
	 * the same on both snapshots.
	 */
	if (sctx->cur_ino != sctx->cmp_key->objectid)
		return 0;
		}

		inconsistent_snapshot_error(sctx, result, "extent");
		return -EIO;
	}

	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
		if (result != BTRFS_COMPARE_TREE_DELETED)
+28 −4
Original line number Diff line number Diff line
@@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
	[TRANS_STATE_COMMIT_START]	= (__TRANS_START | __TRANS_ATTACH),
	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN),
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOSTART),
	[TRANS_STATE_UNBLOCKED]		= (__TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
					   __TRANS_JOIN_NOLOCK |
					   __TRANS_JOIN_NOSTART),
	[TRANS_STATE_COMPLETED]		= (__TRANS_START |
					   __TRANS_ATTACH |
					   __TRANS_JOIN |
					   __TRANS_JOIN_NOLOCK),
					   __TRANS_JOIN_NOLOCK |
					   __TRANS_JOIN_NOSTART),
};

void btrfs_put_transaction(struct btrfs_transaction *transaction)
@@ -543,7 +546,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
		ret = join_transaction(fs_info, type);
		if (ret == -EBUSY) {
			wait_current_trans(fs_info);
			if (unlikely(type == TRANS_ATTACH))
			if (unlikely(type == TRANS_ATTACH ||
				     type == TRANS_JOIN_NOSTART))
				ret = -ENOENT;
		}
	} while (ret == -EBUSY);
@@ -659,6 +663,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root
				 BTRFS_RESERVE_NO_FLUSH, true);
}

/*
 * Similar to regular join but it never starts a transaction when none is
 * running or after waiting for the current one to finish.
 */
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
{
	return start_transaction(root, 0, TRANS_JOIN_NOSTART,
				 BTRFS_RESERVE_NO_FLUSH, true);
}

/*
 * btrfs_attach_transaction() - catch the running transaction
 *
@@ -2037,6 +2051,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
		}
	} else {
		spin_unlock(&fs_info->trans_lock);
		/*
		 * The previous transaction was aborted and was already removed
		 * from the list of transactions at fs_info->trans_list. So we
		 * abort to prevent writing a new superblock that reflects a
		 * corrupt state (pointing to trees with unwritten nodes/leafs).
		 */
		if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) {
			ret = -EROFS;
			goto cleanup_transaction;
		}
	}

	extwriter_counter_dec(cur_trans, trans->type);
+3 −0
Original line number Diff line number Diff line
@@ -94,11 +94,13 @@ struct btrfs_transaction {
#define __TRANS_JOIN		(1U << 11)
#define __TRANS_JOIN_NOLOCK	(1U << 12)
#define __TRANS_DUMMY		(1U << 13)
#define __TRANS_JOIN_NOSTART	(1U << 14)

#define TRANS_START		(__TRANS_START | __TRANS_FREEZABLE)
#define TRANS_ATTACH		(__TRANS_ATTACH)
#define TRANS_JOIN		(__TRANS_JOIN | __TRANS_FREEZABLE)
#define TRANS_JOIN_NOLOCK	(__TRANS_JOIN_NOLOCK)
#define TRANS_JOIN_NOSTART	(__TRANS_JOIN_NOSTART)

#define TRANS_EXTWRITERS	(__TRANS_START | __TRANS_ATTACH)

@@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
					int min_factor);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
					struct btrfs_root *root);