Commit 71c061d2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:
 "A few more fixes that arrived before the end of the year:

   - a bunch of fixes related to transaction handle lifetime wrt various
     operations (umount, remount, qgroup scan, orphan cleanup)

   - async discard scheduling fixes

   - fix item size calculation when item keys collide for extend refs
     (hardlinks)

   - fix qgroup flushing from running transaction

   - fix send, wrong file path when there is an inode with a pending
     rmdir

   - fix deadlock when cloning inline extent and low on free metadata
     space"

* tag 'for-5.11-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: run delayed iputs when remounting RO to avoid leaking them
  btrfs: add assertion for empty list of transactions at late stage of umount
  btrfs: fix race between RO remount and the cleaner task
  btrfs: fix transaction leak and crash after cleaning up orphans on RO mount
  btrfs: fix transaction leak and crash after RO remount caused by qgroup rescan
  btrfs: merge critical sections of discard lock in workfn
  btrfs: fix racy access to discard_ctl data
  btrfs: fix async discard stall
  btrfs: tests: initialize test inodes location
  btrfs: send: fix wrong file path when there is an inode with a pending rmdir
  btrfs: qgroup: don't try to wait flushing if we're already holding a transaction
  btrfs: correctly calculate item size used when item key collision happens
  btrfs: fix deadlock when cloning inline extent and low on free metadata space
parents 9f1abbe9 a8cc263e
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -42,6 +42,15 @@ enum {
	 * to an inode.
	 */
	BTRFS_INODE_NO_XATTRS,
	/*
	 * Set when we are in a context where we need to start a transaction and
	 * have dirty pages with the respective file range locked. This is to
	 * ensure that when reserving space for the transaction, if we are low
	 * on available space and need to flush delalloc, we will not flush
	 * delalloc for this inode, because that could result in a deadlock (on
	 * the file range, inode's io_tree).
	 */
	BTRFS_INODE_NO_DELALLOC_FLUSH,
};

/* in memory btrfs inode */
+22 −2
Original line number Diff line number Diff line
@@ -2555,8 +2555,14 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
 * @p:		Holds all btree nodes along the search path
 * @root:	The root node of the tree
 * @key:	The key we are looking for
 * @ins_len:	Indicates purpose of search, for inserts it is 1, for
 *		deletions it's -1. 0 for plain searches
 * @ins_len:	Indicates purpose of search:
 *              >0  for inserts it's size of item inserted (*)
 *              <0  for deletions
 *               0  for plain searches, not modifying the tree
 *
 *              (*) If size of item inserted doesn't include
 *              sizeof(struct btrfs_item), then p->search_for_extension must
 *              be set.
 * @cow:	boolean should CoW operations be performed. Must always be 1
 *		when modifying the tree.
 *
@@ -2717,6 +2723,20 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,

		if (level == 0) {
			p->slots[level] = slot;
			/*
			 * Item key already exists. In this case, if we are
			 * allowed to insert the item (for example, in dir_item
			 * case, item key collision is allowed), it will be
			 * merged with the original item. Only the item size
			 * grows, no new btrfs item will be added. If
			 * search_for_extension is not set, ins_len already
			 * accounts the size btrfs_item, deduct it here so leaf
			 * space check will be correct.
			 */
			if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
				ASSERT(ins_len >= sizeof(struct btrfs_item));
				ins_len -= sizeof(struct btrfs_item);
			}
			if (ins_len > 0 &&
			    btrfs_leaf_free_space(b) < ins_len) {
				if (write_lock_level < 1) {
+27 −2
Original line number Diff line number Diff line
@@ -131,6 +131,8 @@ enum {
	 * defrag
	 */
	BTRFS_FS_STATE_REMOUNTING,
	/* Filesystem in RO mode */
	BTRFS_FS_STATE_RO,
	/* Track if a transaction abort has been reported on this filesystem */
	BTRFS_FS_STATE_TRANS_ABORTED,
	/*
@@ -367,6 +369,12 @@ struct btrfs_path {
	unsigned int search_commit_root:1;
	unsigned int need_commit_sem:1;
	unsigned int skip_release_on_error:1;
	/*
	 * Indicate that new item (btrfs_search_slot) is extending already
	 * existing item and ins_len contains only the data size and not item
	 * header (ie. sizeof(struct btrfs_item) is not included).
	 */
	unsigned int search_for_extension:1;
};
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
					sizeof(struct btrfs_item))
@@ -2885,10 +2893,26 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
 * If we remount the fs to be R/O or umount the fs, the cleaner needn't do
 * anything except sleeping. This function is used to check the status of
 * the fs.
 * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount,
 * since setting and checking for SB_RDONLY in the superblock's flags is not
 * atomic.
 */
static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
{
	return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);
	return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
		btrfs_fs_closing(fs_info);
}

static inline void btrfs_set_sb_rdonly(struct super_block *sb)
{
	sb->s_flags |= SB_RDONLY;
	set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
}

static inline void btrfs_clear_sb_rdonly(struct super_block *sb)
{
	sb->s_flags &= ~SB_RDONLY;
	clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
}

/* tree mod log functions from ctree.c */
@@ -3073,7 +3097,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
			       u32 min_type);

int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
			       bool in_reclaim_context);
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
			      unsigned int extra_bits,
			      struct extent_state **cached_state);
+1 −1
Original line number Diff line number Diff line
@@ -715,7 +715,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
	 * flush all outstanding I/O and inode extent mappings before the
	 * copy operation is declared as being finished
	 */
	ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
	ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
	if (ret) {
		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
		return ret;
+36 −34
Original line number Diff line number Diff line
@@ -199,16 +199,15 @@ static struct btrfs_block_group *find_next_block_group(
static struct btrfs_block_group *peek_discard_list(
					struct btrfs_discard_ctl *discard_ctl,
					enum btrfs_discard_state *discard_state,
					int *discard_index)
					int *discard_index, u64 now)
{
	struct btrfs_block_group *block_group;
	const u64 now = ktime_get_ns();

	spin_lock(&discard_ctl->lock);
again:
	block_group = find_next_block_group(discard_ctl, now);

	if (block_group && now > block_group->discard_eligible_time) {
	if (block_group && now >= block_group->discard_eligible_time) {
		if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
		    block_group->used != 0) {
			if (btrfs_is_block_group_data_only(block_group))
@@ -222,12 +221,11 @@ static struct btrfs_block_group *peek_discard_list(
			block_group->discard_state = BTRFS_DISCARD_EXTENTS;
		}
		discard_ctl->block_group = block_group;
	}
	if (block_group) {
		*discard_state = block_group->discard_state;
		*discard_index = block_group->discard_index;
	} else {
		block_group = NULL;
	}

	spin_unlock(&discard_ctl->lock);

	return block_group;
@@ -330,28 +328,15 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
		btrfs_discard_schedule_work(discard_ctl, false);
}

/**
 * btrfs_discard_schedule_work - responsible for scheduling the discard work
 * @discard_ctl: discard control
 * @override: override the current timer
 *
 * Discards are issued by a delayed workqueue item.  @override is used to
 * update the current delay as the baseline delay interval is reevaluated on
 * transaction commit.  This is also maxed with any other rate limit.
 */
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
				 bool override)
static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
					  u64 now, bool override)
{
	struct btrfs_block_group *block_group;
	const u64 now = ktime_get_ns();

	spin_lock(&discard_ctl->lock);

	if (!btrfs_run_discard_work(discard_ctl))
		goto out;

		return;
	if (!override && delayed_work_pending(&discard_ctl->work))
		goto out;
		return;

	block_group = find_next_block_group(discard_ctl, now);
	if (block_group) {
@@ -393,7 +378,24 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
		mod_delayed_work(discard_ctl->discard_workers,
				 &discard_ctl->work, nsecs_to_jiffies(delay));
	}
out:
}

/*
 * btrfs_discard_schedule_work - responsible for scheduling the discard work
 * @discard_ctl:  discard control
 * @override:     override the current timer
 *
 * Discards are issued by a delayed workqueue item.  @override is used to
 * update the current delay as the baseline delay interval is reevaluated on
 * transaction commit.  This is also maxed with any other rate limit.
 */
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
				 bool override)
{
	const u64 now = ktime_get_ns();

	spin_lock(&discard_ctl->lock);
	__btrfs_discard_schedule_work(discard_ctl, now, override);
	spin_unlock(&discard_ctl->lock);
}

@@ -438,13 +440,18 @@ static void btrfs_discard_workfn(struct work_struct *work)
	int discard_index = 0;
	u64 trimmed = 0;
	u64 minlen = 0;
	u64 now = ktime_get_ns();

	discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);

	block_group = peek_discard_list(discard_ctl, &discard_state,
					&discard_index);
					&discard_index, now);
	if (!block_group || !btrfs_run_discard_work(discard_ctl))
		return;
	if (now < block_group->discard_eligible_time) {
		btrfs_discard_schedule_work(discard_ctl, false);
		return;
	}

	/* Perform discarding */
	minlen = discard_minlen[discard_index];
@@ -474,13 +481,6 @@ static void btrfs_discard_workfn(struct work_struct *work)
		discard_ctl->discard_extent_bytes += trimmed;
	}

	/*
	 * Updated without locks as this is inside the workfn and nothing else
	 * is reading the values
	 */
	discard_ctl->prev_discard = trimmed;
	discard_ctl->prev_discard_time = ktime_get_ns();

	/* Determine next steps for a block_group */
	if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
		if (discard_state == BTRFS_DISCARD_BITMAPS) {
@@ -496,11 +496,13 @@ static void btrfs_discard_workfn(struct work_struct *work)
		}
	}

	now = ktime_get_ns();
	spin_lock(&discard_ctl->lock);
	discard_ctl->prev_discard = trimmed;
	discard_ctl->prev_discard_time = now;
	discard_ctl->block_group = NULL;
	__btrfs_discard_schedule_work(discard_ctl, now, false);
	spin_unlock(&discard_ctl->lock);

	btrfs_discard_schedule_work(discard_ctl, false);
}

/**
Loading