Commit 56fbb0a4 authored by Naohiro Aota's avatar Naohiro Aota Committed by David Sterba
Browse files

btrfs: zoned: properly finish block group on metadata write



Commit be1a1d7a ("btrfs: zoned: finish fully written block group")
introduced zone finishing code both for data and metadata end_io path.
However, the metadata side is not working as it should. First, it
compares logical address (eb->start + eb->len) with offset within a
block group (cache->zone_capacity) in submit_eb_page(). That essentially
disabled zone finishing on metadata end_io path.

Furthermore, fixing the issue above revealed we cannot call
btrfs_zone_finish_endio() in end_extent_buffer_writeback(). We cannot
call btrfs_lookup_block_group() which require spin lock inside end_io
context.

Introduce btrfs_schedule_zone_finish_bg() to wait for the extent buffer
writeback and do the zone finish IO in a workqueue.

Also, drop EXTENT_BUFFER_ZONE_FINISH as it is no longer used.

Fixes: be1a1d7a ("btrfs: zoned: finish fully written block group")
CC: stable@vger.kernel.org # 5.16+
Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: default avatarNaohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 8b8a5399
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -212,6 +212,8 @@ struct btrfs_block_group {
	u64 meta_write_pointer;
	struct map_lookup *physical_map;
	struct list_head active_bg_list;
	struct work_struct zone_finish_work;
	struct extent_buffer *last_eb;
};

static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
+1 −5
Original line number Diff line number Diff line
@@ -4251,9 +4251,6 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)

static void end_extent_buffer_writeback(struct extent_buffer *eb)
{
	if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
		btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);

	clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
	smp_mb__after_atomic();
	wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
@@ -4843,8 +4840,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
		/*
		 * Implies write in zoned mode. Mark the last eb in a block group.
		 */
		if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
			set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
		btrfs_schedule_zone_finish_bg(cache, eb);
		btrfs_put_block_group(cache);
	}
	ret = write_one_eb(eb, wbc, epd);
+0 −1
Original line number Diff line number Diff line
@@ -26,7 +26,6 @@ enum {
	/* write IO error */
	EXTENT_BUFFER_WRITE_ERR,
	EXTENT_BUFFER_NO_CHECK,
	EXTENT_BUFFER_ZONE_FINISH,
};

/* these are flags for __process_pages_contig */
+31 −0
Original line number Diff line number Diff line
@@ -2046,6 +2046,37 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
	btrfs_put_block_group(block_group);
}

static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
{
	struct btrfs_block_group *bg =
		container_of(work, struct btrfs_block_group, zone_finish_work);

	wait_on_extent_buffer_writeback(bg->last_eb);
	free_extent_buffer(bg->last_eb);
	btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length);
	btrfs_put_block_group(bg);
}

void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
				   struct extent_buffer *eb)
{
	if (!bg->seq_zone || eb->start + eb->len * 2 <= bg->start + bg->zone_capacity)
		return;

	if (WARN_ON(bg->zone_finish_work.func == btrfs_zone_finish_endio_workfn)) {
		btrfs_err(bg->fs_info, "double scheduling of bg %llu zone finishing",
			  bg->start);
		return;
	}

	/* For the work */
	btrfs_get_block_group(bg);
	atomic_inc(&eb->refs);
	bg->last_eb = eb;
	INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn);
	queue_work(system_unbound_wq, &bg->zone_finish_work);
}

void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
{
	struct btrfs_fs_info *fs_info = bg->fs_info;
+5 −0
Original line number Diff line number Diff line
@@ -72,6 +72,8 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group);
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
			     u64 length);
void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
				   struct extent_buffer *eb);
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
@@ -230,6 +232,9 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
					   u64 logical, u64 length) { }

static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
						 struct extent_buffer *eb) { }

static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }

static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }