Commit 2c58c393 authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba
Browse files

btrfs: remove BUG() after failure to insert delayed dir index item

Instead of calling BUG() when we fail to insert a delayed dir index item
into the delayed node's tree, we can just release all the resources we
have allocated/acquired before and return the error to the caller. This is
fine because all existing call chains undo anything they have done before
calling btrfs_insert_delayed_dir_index() or BUG_ON (when creating pending
snapshots in the transaction commit path).

So remove the BUG() call and do proper error handling.

This relates to a syzbot report linked below, but does not fix it because
it only prevents hitting a BUG(), it does not fix the issue where somehow
we attempt to use twice the same index number for different index items.

Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/


CC: stable@vger.kernel.org # 5.4+
Reviewed-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 91bfe310
Loading
Loading
Loading
Loading
+47 −27
Original line number Diff line number Diff line
@@ -1426,7 +1426,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
	btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
}

/* Will return 0 or -ENOMEM */
static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
{
	struct btrfs_fs_info *fs_info = trans->fs_info;
	const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);

	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
		return;

	/*
	 * Adding the new dir index item does not require touching another
	 * leaf, so we can release 1 unit of metadata that was previously
	 * reserved when starting the transaction. This applies only to
	 * the case where we had a transaction start and excludes the
	 * transaction join case (when replaying log trees).
	 */
	trace_btrfs_space_reservation(fs_info, "transaction",
				      trans->transid, bytes, 0);
	btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
	ASSERT(trans->bytes_reserved >= bytes);
	trans->bytes_reserved -= bytes;
}

/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
				   const char *name, int name_len,
				   struct btrfs_inode *dir,
@@ -1468,6 +1490,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,

	mutex_lock(&delayed_node->mutex);

	/*
	 * First attempt to insert the delayed item. This is to make the error
	 * handling path simpler in case we fail (-EEXIST). There's no risk of
	 * any other task coming in and running the delayed item before we do
	 * the metadata space reservation below, because we are holding the
	 * delayed node's mutex and that mutex must also be locked before the
	 * node's delayed items can be run.
	 */
	ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
	if (unlikely(ret)) {
		btrfs_err(trans->fs_info,
"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
			  name_len, name, index, btrfs_root_id(delayed_node->root),
			  delayed_node->inode_id, dir->index_cnt,
			  delayed_node->index_cnt, ret);
		btrfs_release_delayed_item(delayed_item);
		btrfs_release_dir_index_item_space(trans);
		mutex_unlock(&delayed_node->mutex);
		goto release_node;
	}

	if (delayed_node->index_item_leaves == 0 ||
	    delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
		delayed_node->curr_index_batch_size = data_len;
@@ -1485,37 +1528,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
		 * impossible.
		 */
		if (WARN_ON(ret)) {
			mutex_unlock(&delayed_node->mutex);
			btrfs_release_delayed_item(delayed_item);
			mutex_unlock(&delayed_node->mutex);
			goto release_node;
		}

		delayed_node->index_item_leaves++;
	} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
		const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);

		/*
		 * Adding the new dir index item does not require touching another
		 * leaf, so we can release 1 unit of metadata that was previously
		 * reserved when starting the transaction. This applies only to
		 * the case where we had a transaction start and excludes the
		 * transaction join case (when replaying log trees).
		 */
		trace_btrfs_space_reservation(fs_info, "transaction",
					      trans->transid, bytes, 0);
		btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
		ASSERT(trans->bytes_reserved >= bytes);
		trans->bytes_reserved -= bytes;
	}

	ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
	if (unlikely(ret)) {
		btrfs_err(trans->fs_info,
"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
			  name_len, name, index, btrfs_root_id(delayed_node->root),
			  delayed_node->inode_id, dir->index_cnt,
			  delayed_node->index_cnt, ret);
		BUG();
	} else {
		btrfs_release_dir_index_item_space(trans);
	}
	mutex_unlock(&delayed_node->mutex);