Commit 46670259 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:
 "Stable fixes:

   - fix race between balance and cancel/pause

   - various iput() fixes

   - fix use-after-free of new block group that became unused

   - fix warning when putting transaction with qgroups enabled after
     abort

   - fix crash in subpage mode when page could be released between map
     and map read

   - when scrubbing raid56 verify the P/Q stripes unconditionally

   - fix minor memory leak in zoned mode when a block group with an
     unexpected superblock is found

  Regression fixes:

   - fix ordered extent split error handling when submitting direct IO

   - user irq-safe locking when adding delayed iputs"

* tag 'for-6.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix warning when putting transaction with qgroups enabled after abort
  btrfs: fix ordered extent split error handling in btrfs_dio_submit_io
  btrfs: set_page_extent_mapped after read_folio in btrfs_cont_expand
  btrfs: raid56: always verify the P/Q contents for scrub
  btrfs: use irq safe locking when running and adding delayed iputs
  btrfs: fix iput() on error pointer after error during orphan cleanup
  btrfs: fix double iput() on inode after an error during orphan cleanup
  btrfs: zoned: fix memory leak after finding block group with super blocks
  btrfs: fix use-after-free of new block group that became unused
  btrfs: be a bit more careful when setting mirror_num_ret in btrfs_map_block
  btrfs: fix race between balance and cancel/pause
parents 2922800a aa84ce8a
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -1640,13 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
{
	struct btrfs_fs_info *fs_info = bg->fs_info;

	trace_btrfs_add_unused_block_group(bg);
	spin_lock(&fs_info->unused_bgs_lock);
	if (list_empty(&bg->bg_list)) {
		btrfs_get_block_group(bg);
		trace_btrfs_add_unused_block_group(bg);
		list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
	} else {
	} else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) {
		/* Pull out the block group from the reclaim_bgs list. */
		trace_btrfs_add_unused_block_group(bg);
		list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
	}
	spin_unlock(&fs_info->unused_bgs_lock);
@@ -2087,6 +2088,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)

		/* Shouldn't have super stripes in sequential zones */
		if (zoned && nr) {
			kfree(logical);
			btrfs_err(fs_info,
			"zoned: block group %llu must not contain super block",
				  cache->start);
@@ -2668,6 +2670,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
next:
		btrfs_delayed_refs_rsv_release(fs_info, 1);
		list_del_init(&block_group->bg_list);
		clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
	}
	btrfs_trans_release_chunk_metadata(trans);
}
@@ -2707,6 +2710,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
	if (!cache)
		return ERR_PTR(-ENOMEM);

	/*
	 * Mark it as new before adding it to the rbtree of block groups or any
	 * list, so that no other task finds it and calls btrfs_mark_bg_unused()
	 * before the new flag is set.
	 */
	set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags);

	cache->length = size;
	set_free_space_tree_thresholds(cache);
	cache->flags = type;
+5 −0
Original line number Diff line number Diff line
@@ -70,6 +70,11 @@ enum btrfs_block_group_flags {
	BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
	/* Indicate that the block group is placed on a sequential zone */
	BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
	/*
	 * Indicate that block group is in the list of new block groups of a
	 * transaction.
	 */
	BLOCK_GROUP_FLAG_NEW,
};

enum btrfs_caching_type {
+52 −25
Original line number Diff line number Diff line
@@ -3482,15 +3482,21 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
void btrfs_add_delayed_iput(struct btrfs_inode *inode)
{
	struct btrfs_fs_info *fs_info = inode->root->fs_info;
	unsigned long flags;

	if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1))
		return;

	atomic_inc(&fs_info->nr_delayed_iputs);
	spin_lock(&fs_info->delayed_iput_lock);
	/*
	 * Need to be irq safe here because we can be called from either an irq
	 * context (see bio.c and btrfs_put_ordered_extent()) or a non-irq
	 * context.
	 */
	spin_lock_irqsave(&fs_info->delayed_iput_lock, flags);
	ASSERT(list_empty(&inode->delayed_iput));
	list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs);
	spin_unlock(&fs_info->delayed_iput_lock);
	spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags);
	if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
		wake_up_process(fs_info->cleaner_kthread);
}
@@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
				    struct btrfs_inode *inode)
{
	list_del_init(&inode->delayed_iput);
	spin_unlock(&fs_info->delayed_iput_lock);
	spin_unlock_irq(&fs_info->delayed_iput_lock);
	iput(&inode->vfs_inode);
	if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
		wake_up(&fs_info->delayed_iputs_wait);
	spin_lock(&fs_info->delayed_iput_lock);
	spin_lock_irq(&fs_info->delayed_iput_lock);
}

static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
				   struct btrfs_inode *inode)
{
	if (!list_empty(&inode->delayed_iput)) {
		spin_lock(&fs_info->delayed_iput_lock);
		spin_lock_irq(&fs_info->delayed_iput_lock);
		if (!list_empty(&inode->delayed_iput))
			run_delayed_iput_locked(fs_info, inode);
		spin_unlock(&fs_info->delayed_iput_lock);
		spin_unlock_irq(&fs_info->delayed_iput_lock);
	}
}

void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
{

	spin_lock(&fs_info->delayed_iput_lock);
	/*
	 * btrfs_put_ordered_extent() can run in irq context (see bio.c), which
	 * calls btrfs_add_delayed_iput() and that needs to lock
	 * fs_info->delayed_iput_lock. So we need to disable irqs here to
	 * prevent a deadlock.
	 */
	spin_lock_irq(&fs_info->delayed_iput_lock);
	while (!list_empty(&fs_info->delayed_iputs)) {
		struct btrfs_inode *inode;

		inode = list_first_entry(&fs_info->delayed_iputs,
				struct btrfs_inode, delayed_iput);
		run_delayed_iput_locked(fs_info, inode);
		cond_resched_lock(&fs_info->delayed_iput_lock);
		if (need_resched()) {
			spin_unlock_irq(&fs_info->delayed_iput_lock);
			cond_resched();
			spin_lock_irq(&fs_info->delayed_iput_lock);
		}
	spin_unlock(&fs_info->delayed_iput_lock);
	}
	spin_unlock_irq(&fs_info->delayed_iput_lock);
}

/*
@@ -3659,11 +3674,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
		found_key.type = BTRFS_INODE_ITEM_KEY;
		found_key.offset = 0;
		inode = btrfs_iget(fs_info->sb, last_objectid, root);
		ret = PTR_ERR_OR_ZERO(inode);
		if (ret && ret != -ENOENT)
		if (IS_ERR(inode)) {
			ret = PTR_ERR(inode);
			inode = NULL;
			if (ret != -ENOENT)
				goto out;
		}

		if (ret == -ENOENT && root == fs_info->tree_root) {
		if (!inode && root == fs_info->tree_root) {
			struct btrfs_root *dead_root;
			int is_dead_root = 0;

@@ -3724,17 +3742,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
		 * deleted but wasn't. The inode number may have been reused,
		 * but either way, we can delete the orphan item.
		 */
		if (ret == -ENOENT || inode->i_nlink) {
			if (!ret) {
		if (!inode || inode->i_nlink) {
			if (inode) {
				ret = btrfs_drop_verity_items(BTRFS_I(inode));
				iput(inode);
				inode = NULL;
				if (ret)
					goto out;
			}
			trans = btrfs_start_transaction(root, 1);
			if (IS_ERR(trans)) {
				ret = PTR_ERR(trans);
				iput(inode);
				goto out;
			}
			btrfs_debug(fs_info, "auto deleting %Lu",
@@ -3742,10 +3760,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
			ret = btrfs_del_orphan_item(trans, root,
						    found_key.objectid);
			btrfs_end_transaction(trans);
			if (ret) {
				iput(inode);
			if (ret)
				goto out;
			}
			continue;
		}

@@ -4847,9 +4863,6 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
		ret = -ENOMEM;
		goto out;
	}
	ret = set_page_extent_mapped(page);
	if (ret < 0)
		goto out_unlock;

	if (!PageUptodate(page)) {
		ret = btrfs_read_folio(NULL, page_folio(page));
@@ -4864,6 +4877,17 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
			goto out_unlock;
		}
	}

	/*
	 * We unlock the page after the io is completed and then re-lock it
	 * above.  release_folio() could have come in between that and cleared
	 * PagePrivate(), but left the page in the mapping.  Set the page mapped
	 * here to make sure it's properly set for the subpage stuff.
	 */
	ret = set_page_extent_mapped(page);
	if (ret < 0)
		goto out_unlock;

	wait_on_page_writeback(page);

	lock_extent(io_tree, block_start, block_end, &cached_state);
@@ -7849,8 +7873,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,

		ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
		if (ret) {
			bbio->bio.bi_status = errno_to_blk_status(ret);
			btrfs_dio_end_io(bbio);
			btrfs_finish_ordered_extent(dio_data->ordered, NULL,
						    file_offset, dip->bytes,
						    !ret);
			bio->bi_status = errno_to_blk_status(ret);
			iomap_dio_bio_end_io(bio);
			return;
		}
	}
+1 −0
Original line number Diff line number Diff line
@@ -4445,4 +4445,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
		ulist_free(entry->old_roots);
		kfree(entry);
	}
	*root = RB_ROOT;
}
+3 −8
Original line number Diff line number Diff line
@@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work);
static void index_rbio_pages(struct btrfs_raid_bio *rbio);
static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);

static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check);
static int finish_parity_scrub(struct btrfs_raid_bio *rbio);
static void scrub_rbio_work_locked(struct work_struct *work);

static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
@@ -2404,7 +2404,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
	return 0;
}

static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
{
	struct btrfs_io_context *bioc = rbio->bioc;
	const u32 sectorsize = bioc->fs_info->sectorsize;
@@ -2445,9 +2445,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
	 */
	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);

	if (!need_check)
		goto writeback;

	p_sector.page = alloc_page(GFP_NOFS);
	if (!p_sector.page)
		return -ENOMEM;
@@ -2516,7 +2513,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
		q_sector.page = NULL;
	}

writeback:
	/*
	 * time to start writing.  Make bios for everything from the
	 * higher layers (the bio_list in our rbio) and our p/q.  Ignore
@@ -2699,7 +2695,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)

static void scrub_rbio(struct btrfs_raid_bio *rbio)
{
	bool need_check = false;
	int sector_nr;
	int ret;

@@ -2722,7 +2717,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
	 * We have every sector properly prepared. Can finish the scrub
	 * and writeback the good content.
	 */
	ret = finish_parity_scrub(rbio, need_check);
	ret = finish_parity_scrub(rbio);
	wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
	for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
		int found_errors;
Loading