Commit 2dd7e7bc authored by Naohiro Aota's avatar Naohiro Aota Committed by David Sterba
Browse files

btrfs: zoned: wait for extent buffer IOs before finishing a zone



Before sending REQ_OP_ZONE_FINISH to a zone, we need to ensure that
ongoing IOs already finished. Or, we will see a "Zone Is Full" error for
the IOs, as the ZONE_FINISH command makes the zone full.

We ensure that with btrfs_wait_block_group_reservations() and
btrfs_wait_ordered_roots() for a data block group. And, for a metadata
block group, the comparison of alloc_offset vs meta_write_pointer mostly
ensures IOs for the allocated region already sent. However, there still
can be a little time frame where the IOs are sent but not yet completed.

Introduce wait_eb_writebacks() to ensure such IOs are completed for a
metadata block group. It walks the buffer_radix to find extent buffers in
the block group and calls wait_on_extent_buffer_writeback() on them.

Fixes: afba2bc0 ("btrfs: zoned: implement active zone tracking")
CC: stable@vger.kernel.org # 5.19+
Signed-off-by: default avatarNaohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent a362bb86
Loading
Loading
Loading
Loading
+38 −2
Original line number Diff line number Diff line
@@ -1918,10 +1918,44 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
	return ret;
}

static void wait_eb_writebacks(struct btrfs_block_group *block_group)
{
	struct btrfs_fs_info *fs_info = block_group->fs_info;
	const u64 end = block_group->start + block_group->length;
	struct radix_tree_iter iter;
	struct extent_buffer *eb;
	void __rcu **slot;

	rcu_read_lock();
	radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
				 block_group->start >> fs_info->sectorsize_bits) {
		eb = radix_tree_deref_slot(slot);
		if (!eb)
			continue;
		if (radix_tree_deref_retry(eb)) {
			slot = radix_tree_iter_retry(&iter);
			continue;
		}

		if (eb->start < block_group->start)
			continue;
		if (eb->start >= end)
			break;

		slot = radix_tree_iter_resume(slot, &iter);
		rcu_read_unlock();
		wait_on_extent_buffer_writeback(eb);
		rcu_read_lock();
	}
	rcu_read_unlock();
}

static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
{
	struct btrfs_fs_info *fs_info = block_group->fs_info;
	struct map_lookup *map;
	const bool is_metadata = (block_group->flags &
			(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
	int ret = 0;
	int i;

@@ -1932,8 +1966,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
	}

	/* Check if we have unwritten allocated space */
	if ((block_group->flags &
	     (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
	if (is_metadata &&
	    block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
		spin_unlock(&block_group->lock);
		return -EAGAIN;
@@ -1958,6 +1991,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
		/* No need to wait for NOCOW writers. Zoned mode does not allow that */
		btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
					 block_group->length);
		/* Wait for extent buffers to be written. */
		if (is_metadata)
			wait_eb_writebacks(block_group);

		spin_lock(&block_group->lock);