Commit 3669558b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - several fixes for handling directory item (inserting, removing,
   iteration, error handling)

 - fix transaction commit stalls when auto relocation is running and
   blocks other tasks that want to commit

 - fix a build error when DEBUG is enabled

 - fix lockdep warning in inode number lookup ioctl

 - fix race when finishing block group creation

 - remove link to obsolete wiki in several files

* tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  MAINTAINERS: remove links to obsolete btrfs.wiki.kernel.org
  btrfs: assert delayed node locked when removing delayed item
  btrfs: remove BUG() after failure to insert delayed dir index item
  btrfs: improve error message after failure to add delayed dir index item
  btrfs: fix a compilation error if DEBUG is defined in btree_dirty_folio
  btrfs: check for BTRFS_FS_ERROR in pending ordered assert
  btrfs: fix lockdep splat and potential deadlock after failure running delayed items
  btrfs: do not block starts waiting on previous transaction commit
  btrfs: release path before inode lookup during the ino lookup ioctl
  btrfs: fix race between finishing block group creation and its item update
parents 2c758cef 5facccc9
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -37,7 +37,6 @@ For more information please refer to the documentation site or wiki

  https://btrfs.readthedocs.io

  https://btrfs.wiki.kernel.org

that maintains information about administration tasks, frequently asked
questions, use cases, mount options, comprehensible changelogs, features,
+0 −1
Original line number Diff line number Diff line
@@ -4378,7 +4378,6 @@ M: David Sterba <dsterba@suse.com>
L:	linux-btrfs@vger.kernel.org
S:	Maintained
W:	https://btrfs.readthedocs.io
W:	https://btrfs.wiki.kernel.org/
Q:	https://patchwork.kernel.org/project/linux-btrfs/list/
C:	irc://irc.libera.chat/btrfs
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
+1 −1
Original line number Diff line number Diff line
@@ -31,7 +31,7 @@ config BTRFS_FS
	  continue to be mountable and usable by newer kernels.

	  For more information, please see the web pages at
	  http://btrfs.wiki.kernel.org.
	  https://btrfs.readthedocs.io

	  To compile this file system support as a module, choose M here. The
	  module will be called btrfs.
+10 −2
Original line number Diff line number Diff line
@@ -3028,8 +3028,16 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
	btrfs_mark_buffer_dirty(leaf);
fail:
	btrfs_release_path(path);
	/* We didn't update the block group item, need to revert @commit_used. */
	if (ret < 0) {
	/*
	 * We didn't update the block group item, need to revert commit_used
	 * unless the block group item didn't exist yet - this is to prevent a
	 * race with a concurrent insertion of the block group item, with
	 * insert_block_group_item(), that happened just after we attempted to
	 * update. In that case we would reset commit_used to 0 just after the
	 * insertion set it to a value greater than 0 - if the block group later
	 * becomes with 0 used bytes, we would incorrectly skip its update.
	 */
	if (ret < 0 && ret != -ENOENT) {
		spin_lock(&cache->lock);
		cache->commit_used = old_commit_used;
		spin_unlock(&cache->lock);
+71 −33
Original line number Diff line number Diff line
@@ -412,6 +412,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)

static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{
	struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
	struct rb_root_cached *root;
	struct btrfs_delayed_root *delayed_root;

@@ -419,18 +420,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
	if (RB_EMPTY_NODE(&delayed_item->rb_node))
		return;

	delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
	/* If it's in a rbtree, then we need to have delayed node locked. */
	lockdep_assert_held(&delayed_node->mutex);

	delayed_root = delayed_node->root->fs_info->delayed_root;

	BUG_ON(!delayed_root);

	if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
		root = &delayed_item->delayed_node->ins_root;
		root = &delayed_node->ins_root;
	else
		root = &delayed_item->delayed_node->del_root;
		root = &delayed_node->del_root;

	rb_erase_cached(&delayed_item->rb_node, root);
	RB_CLEAR_NODE(&delayed_item->rb_node);
	delayed_item->delayed_node->count--;
	delayed_node->count--;

	finish_one_item(delayed_root);
}
@@ -1153,20 +1157,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
		ret = __btrfs_commit_inode_delayed_items(trans, path,
							 curr_node);
		if (ret) {
			btrfs_release_delayed_node(curr_node);
			curr_node = NULL;
			btrfs_abort_transaction(trans, ret);
			break;
		}

		prev_node = curr_node;
		curr_node = btrfs_next_delayed_node(curr_node);
		/*
		 * See the comment below about releasing path before releasing
		 * node. If the commit of delayed items was successful the path
		 * should always be released, but in case of an error, it may
		 * point to locked extent buffers (a leaf at the very least).
		 */
		ASSERT(path->nodes[0] == NULL);
		btrfs_release_delayed_node(prev_node);
	}

	/*
	 * Release the path to avoid a potential deadlock and lockdep splat when
	 * releasing the delayed node, as that requires taking the delayed node's
	 * mutex. If another task starts running delayed items before we take
	 * the mutex, it will first lock the mutex and then it may try to lock
	 * the same btree path (leaf).
	 */
	btrfs_free_path(path);

	if (curr_node)
		btrfs_release_delayed_node(curr_node);
	btrfs_free_path(path);
	trans->block_rsv = block_rsv;

	return ret;
@@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
	btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
}

/* Will return 0 or -ENOMEM */
static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
{
	struct btrfs_fs_info *fs_info = trans->fs_info;
	const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);

	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
		return;

	/*
	 * Adding the new dir index item does not require touching another
	 * leaf, so we can release 1 unit of metadata that was previously
	 * reserved when starting the transaction. This applies only to
	 * the case where we had a transaction start and excludes the
	 * transaction join case (when replaying log trees).
	 */
	trace_btrfs_space_reservation(fs_info, "transaction",
				      trans->transid, bytes, 0);
	btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
	ASSERT(trans->bytes_reserved >= bytes);
	trans->bytes_reserved -= bytes;
}

/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
				   const char *name, int name_len,
				   struct btrfs_inode *dir,
@@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,

	mutex_lock(&delayed_node->mutex);

	/*
	 * First attempt to insert the delayed item. This is to make the error
	 * handling path simpler in case we fail (-EEXIST). There's no risk of
	 * any other task coming in and running the delayed item before we do
	 * the metadata space reservation below, because we are holding the
	 * delayed node's mutex and that mutex must also be locked before the
	 * node's delayed items can be run.
	 */
	ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
	if (unlikely(ret)) {
		btrfs_err(trans->fs_info,
"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
			  name_len, name, index, btrfs_root_id(delayed_node->root),
			  delayed_node->inode_id, dir->index_cnt,
			  delayed_node->index_cnt, ret);
		btrfs_release_delayed_item(delayed_item);
		btrfs_release_dir_index_item_space(trans);
		mutex_unlock(&delayed_node->mutex);
		goto release_node;
	}

	if (delayed_node->index_item_leaves == 0 ||
	    delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
		delayed_node->curr_index_batch_size = data_len;
@@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
		 * impossible.
		 */
		if (WARN_ON(ret)) {
			mutex_unlock(&delayed_node->mutex);
			btrfs_release_delayed_item(delayed_item);
			mutex_unlock(&delayed_node->mutex);
			goto release_node;
		}

		delayed_node->index_item_leaves++;
	} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
		const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);

		/*
		 * Adding the new dir index item does not require touching another
		 * leaf, so we can release 1 unit of metadata that was previously
		 * reserved when starting the transaction. This applies only to
		 * the case where we had a transaction start and excludes the
		 * transaction join case (when replaying log trees).
		 */
		trace_btrfs_space_reservation(fs_info, "transaction",
					      trans->transid, bytes, 0);
		btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
		ASSERT(trans->bytes_reserved >= bytes);
		trans->bytes_reserved -= bytes;
	}

	ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
	if (unlikely(ret)) {
		btrfs_err(trans->fs_info,
			  "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
			  name_len, name, delayed_node->root->root_key.objectid,
			  delayed_node->inode_id, ret);
		BUG();
	} else {
		btrfs_release_dir_index_item_space(trans);
	}
	mutex_unlock(&delayed_node->mutex);

Loading