Commit d7172f52 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba
Browse files

btrfs: use per-buffer locking for extent_buffer reading



Instead of locking and unlocking every page or the extent, just add a
new EXTENT_BUFFER_READING bit that mirrors EXTENT_BUFFER_WRITEBACK
for synchronizing threads trying to read an extent_buffer and to wait
for I/O completion.

Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 9e2aff90
Loading
Loading
Loading
Loading
+35 −115
Original line number Diff line number Diff line
@@ -4049,6 +4049,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
static void extent_buffer_read_end_io(struct btrfs_bio *bbio)
{
	struct extent_buffer *eb = bbio->private;
	struct btrfs_fs_info *fs_info = eb->fs_info;
	bool uptodate = !bbio->bio.bi_status;
	struct bvec_iter_all iter_all;
	struct bio_vec *bvec;
@@ -4068,26 +4069,47 @@ static void extent_buffer_read_end_io(struct btrfs_bio *bbio)
	}

	bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
		end_page_read(bvec->bv_page, uptodate, eb->start + bio_offset,
			      bvec->bv_len);
		bio_offset += bvec->bv_len;
	}
		u64 start = eb->start + bio_offset;
		struct page *page = bvec->bv_page;
		u32 len = bvec->bv_len;

	if (eb->fs_info->nodesize < PAGE_SIZE) {
		unlock_extent(&bbio->inode->io_tree, eb->start,
			      eb->start + bio_offset - 1, NULL);
		if (uptodate)
			btrfs_page_set_uptodate(fs_info, page, start, len);
		else
			btrfs_page_clear_uptodate(fs_info, page, start, len);

		bio_offset += len;
	}

	clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
	smp_mb__after_atomic();
	wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
	free_extent_buffer(eb);

	bio_put(&bbio->bio);
}

static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
			     struct btrfs_tree_parent_check *check)
{
	int num_pages = num_extent_pages(eb), i;
	struct btrfs_bio *bbio;

	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
		return 0;

	/*
	 * We could have had EXTENT_BUFFER_UPTODATE cleared by the write
	 * operation, which could potentially still be in flight.  In this case
	 * we simply want to return an error.
	 */
	if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
		return -EIO;

	/* Someone else is already reading the buffer, just wait for it. */
	if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
		goto done;

	clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
	eb->read_mirror = 0;
	check_buffer_tree_ref(eb);
@@ -4108,116 +4130,14 @@ static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
			__bio_add_page(&bbio->bio, eb->pages[i], PAGE_SIZE, 0);
	}
	btrfs_submit_bio(bbio, mirror_num);
}

static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
				      int mirror_num,
				      struct btrfs_tree_parent_check *check)
{
	struct btrfs_fs_info *fs_info = eb->fs_info;
	struct extent_io_tree *io_tree;
	struct page *page = eb->pages[0];
	struct extent_state *cached_state = NULL;
	int ret;

	ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
	ASSERT(PagePrivate(page));
	ASSERT(check);
	io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;

	if (wait == WAIT_NONE) {
		if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1,
				     &cached_state))
			return -EAGAIN;
	} else {
		ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1,
				  &cached_state);
		if (ret < 0)
			return ret;
	}

	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) {
		unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
			      &cached_state);
		return 0;
	}

	btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);

	__read_extent_buffer_pages(eb, mirror_num, check);
	if (wait != WAIT_COMPLETE) {
		free_extent_state(cached_state);
		return 0;
	}

	wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1,
			EXTENT_LOCKED, &cached_state);
done:
	if (wait == WAIT_COMPLETE) {
		wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING, TASK_UNINTERRUPTIBLE);
		if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
			return -EIO;
	return 0;
	}

int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
			     struct btrfs_tree_parent_check *check)
{
	int i;
	struct page *page;
	int locked_pages = 0;
	int num_pages;

	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
		return 0;

	/*
	 * We could have had EXTENT_BUFFER_UPTODATE cleared by the write
	 * operation, which could potentially still be in flight.  In this case
	 * we simply want to return an error.
	 */
	if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
		return -EIO;

	if (eb->fs_info->nodesize < PAGE_SIZE)
		return read_extent_buffer_subpage(eb, wait, mirror_num, check);

	num_pages = num_extent_pages(eb);
	for (i = 0; i < num_pages; i++) {
		page = eb->pages[i];
		if (wait == WAIT_NONE) {
			/*
			 * WAIT_NONE is only utilized by readahead. If we can't
			 * acquire the lock atomically it means either the eb
			 * is being read out or under modification.
			 * Either way the eb will be or has been cached,
			 * readahead can exit safely.
			 */
			if (!trylock_page(page))
				goto unlock_exit;
		} else {
			lock_page(page);
		}
		locked_pages++;
	}

	__read_extent_buffer_pages(eb, mirror_num, check);

	if (wait != WAIT_COMPLETE)
		return 0;

	for (i = 0; i < num_pages; i++) {
		page = eb->pages[i];
		wait_on_page_locked(page);
		if (!PageUptodate(page))
			return -EIO;
	}

	return 0;

unlock_exit:
	while (locked_pages > 0) {
		locked_pages--;
		page = eb->pages[locked_pages];
		unlock_page(page);
	}
	return 0;
}

+2 −0
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ enum {
	/* write IO error */
	EXTENT_BUFFER_WRITE_ERR,
	EXTENT_BUFFER_NO_CHECK,
	/* Indicate that extent buffer pages a being read */
	EXTENT_BUFFER_READING,
};

/* these are flags for __process_pages_contig */