Commit 046b562b authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba
Browse files

btrfs: use a separate end_io handler for read_extent_buffer



Now that we always use a single bio to read an extent_buffer, the buffer
can be passed to the end_io handler as private data.  This allows
implementing a much simplified dedicated end I/O handler for metadata
reads.

Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent e1949310
Loading
Loading
Loading
Loading
+2 −103
Original line number Diff line number Diff line
@@ -427,7 +427,7 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
}

/* Do basic extent buffer checks at read time */
static int validate_extent_buffer(struct extent_buffer *eb,
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
				 struct btrfs_tree_parent_check *check)
{
	struct btrfs_fs_info *fs_info = eb->fs_info;
@@ -541,107 +541,6 @@ static int validate_extent_buffer(struct extent_buffer *eb,
	return ret;
}

static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
				   int mirror, struct btrfs_tree_parent_check *check)
{
	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
	struct extent_buffer *eb;
	bool reads_done;
	int ret = 0;

	ASSERT(check);

	/*
	 * We don't allow bio merge for subpage metadata read, so we should
	 * only get one eb for each endio hook.
	 */
	ASSERT(end == start + fs_info->nodesize - 1);
	ASSERT(PagePrivate(page));

	eb = find_extent_buffer(fs_info, start);
	/*
	 * When we are reading one tree block, eb must have been inserted into
	 * the radix tree. If not, something is wrong.
	 */
	ASSERT(eb);

	reads_done = atomic_dec_and_test(&eb->io_pages);
	/* Subpage read must finish in page read */
	ASSERT(reads_done);

	eb->read_mirror = mirror;
	if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
		ret = -EIO;
		goto err;
	}
	ret = validate_extent_buffer(eb, check);
	if (ret < 0)
		goto err;

	set_extent_buffer_uptodate(eb);

	free_extent_buffer(eb);
	return ret;
err:
	/*
	 * end_bio_extent_readpage decrements io_pages in case of error,
	 * make sure it has something to decrement.
	 */
	atomic_inc(&eb->io_pages);
	clear_extent_buffer_uptodate(eb);
	free_extent_buffer(eb);
	return ret;
}

int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
				   struct page *page, u64 start, u64 end,
				   int mirror)
{
	struct extent_buffer *eb;
	int ret = 0;
	int reads_done;

	ASSERT(page->private);

	if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
		return validate_subpage_buffer(page, start, end, mirror,
					       &bbio->parent_check);

	eb = (struct extent_buffer *)page->private;

	/*
	 * The pending IO might have been the only thing that kept this buffer
	 * in memory.  Make sure we have a ref for all this other checks
	 */
	atomic_inc(&eb->refs);

	reads_done = atomic_dec_and_test(&eb->io_pages);
	if (!reads_done)
		goto err;

	eb->read_mirror = mirror;
	if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
		ret = -EIO;
		goto err;
	}
	ret = validate_extent_buffer(eb, &bbio->parent_check);
	if (!ret)
		set_extent_buffer_uptodate(eb);
err:
	if (ret) {
		/*
		 * our io error hook is going to dec the io pages
		 * again, we have to make sure it has something
		 * to decrement
		 */
		atomic_inc(&eb->io_pages);
		clear_extent_buffer_uptodate(eb);
	}
	free_extent_buffer(eb);

	return ret;
}

#ifdef CONFIG_MIGRATION
static int btree_migrate_folio(struct address_space *mapping,
		struct folio *dst, struct folio *src, enum migrate_mode mode)
+2 −3
Original line number Diff line number Diff line
@@ -82,9 +82,8 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
				 struct btrfs_root *root);
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
				   struct page *page, u64 start, u64 end,
				   int mirror);
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
				 struct btrfs_tree_parent_check *check);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
#endif
+37 −43
Original line number Diff line number Diff line
@@ -663,35 +663,6 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
	btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE);
}

/*
 * Find extent buffer for a givne bytenr.
 *
 * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
 * in endio context.
 */
static struct extent_buffer *find_extent_buffer_readpage(
		struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
{
	struct extent_buffer *eb;

	/*
	 * For regular sectorsize, we can use page->private to grab extent
	 * buffer
	 */
	if (fs_info->nodesize >= PAGE_SIZE) {
		ASSERT(PagePrivate(page) && page->private);
		return (struct extent_buffer *)page->private;
	}

	/* For subpage case, we need to lookup buffer radix tree */
	rcu_read_lock();
	eb = radix_tree_lookup(&fs_info->buffer_radix,
			       bytenr >> fs_info->sectorsize_bits);
	rcu_read_unlock();
	ASSERT(eb);
	return eb;
}

/*
 * after a readpage IO is done, we need to:
 * clear the uptodate bits on error
@@ -713,7 +684,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
	 * larger than UINT_MAX, u32 here is enough.
	 */
	u32 bio_offset = 0;
	int mirror;
	struct bvec_iter_all iter_all;

	ASSERT(!bio_flagged(bio, BIO_CLONED));
@@ -753,11 +723,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
		end = start + bvec->bv_len - 1;
		len = bvec->bv_len;

		mirror = bbio->mirror_num;
		if (uptodate && !is_data_inode(inode) &&
		    btrfs_validate_metadata_buffer(bbio, page, start, end, mirror))
			uptodate = false;

		if (likely(uptodate)) {
			loff_t i_size = i_size_read(inode);
			pgoff_t end_index = i_size >> PAGE_SHIFT;
@@ -778,13 +743,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
				zero_user_segment(page, zero_start,
						  offset_in_page(end) + 1);
			}
		} else if (!is_data_inode(inode)) {
			struct extent_buffer *eb;

			eb = find_extent_buffer_readpage(fs_info, page, start);
			set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
			eb->read_mirror = mirror;
			atomic_dec(&eb->io_pages);
		}

		/* Update page status and unlock. */
@@ -4221,6 +4179,42 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
	}
}

static void extent_buffer_read_end_io(struct btrfs_bio *bbio)
{
	struct extent_buffer *eb = bbio->private;
	bool uptodate = !bbio->bio.bi_status;
	struct bvec_iter_all iter_all;
	struct bio_vec *bvec;
	u32 bio_offset = 0;

	atomic_inc(&eb->refs);
	eb->read_mirror = bbio->mirror_num;

	if (uptodate &&
	    btrfs_validate_extent_buffer(eb, &bbio->parent_check) < 0)
		uptodate = false;

	if (uptodate) {
		set_extent_buffer_uptodate(eb);
	} else {
		clear_extent_buffer_uptodate(eb);
		set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
	}

	bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
		atomic_dec(&eb->io_pages);
		end_page_read(bvec->bv_page, uptodate, eb->start + bio_offset,
			      bvec->bv_len);
		bio_offset += bvec->bv_len;
	}

	unlock_extent(&bbio->inode->io_tree, eb->start,
		      eb->start + bio_offset - 1, NULL);
	free_extent_buffer(eb);

	bio_put(&bbio->bio);
}

static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
				       struct btrfs_tree_parent_check *check)
{
@@ -4234,7 +4228,7 @@ static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,

	bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
			       REQ_OP_READ | REQ_META, eb->fs_info,
			       end_bio_extent_readpage, NULL);
			       extent_buffer_read_end_io, eb);
	bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
	bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
	bbio->file_offset = eb->start;