Commit 01cd3909 authored by David Sterba's avatar David Sterba
Browse files

Revert "btrfs: turn fs_info member buffer_radix into XArray"

This reverts commit 8ee92268.

Revert the xarray conversion, there's a problem with potential
sleep-inside-spinlock [1] when calling xa_insert that triggers GFP_NOFS
allocation. The radix tree used the preloading mechanism to avoid
sleeping but this is not available in xarray.

Conversion from spin lock to mutex is possible but at time of rc6 is
riskier than a clean revert.

[1] https://lore.kernel.org/linux-btrfs/cover.1657097693.git.fdmanana@suse.com/



Reported-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent fc7cbcd4
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -994,10 +994,10 @@ struct btrfs_fs_info {

	struct btrfs_delayed_root *delayed_root;

	/* Extent buffer xarray */
	/* Extent buffer radix tree */
	spinlock_t buffer_lock;
	/* Entries are eb->start / sectorsize */
	struct xarray extent_buffers;
	struct radix_tree_root buffer_radix;

	/* next backup root to be overwritten */
	int backup_root_index;
+2 −2
Original line number Diff line number Diff line
@@ -486,7 +486,7 @@ static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info,
		uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
						       fs_info->nodesize);

		/* A dirty eb shouldn't disappear from extent_buffers */
		/* A dirty eb shouldn't disappear from buffer_radix */
		if (WARN_ON(!eb))
			return -EUCLEAN;

@@ -3150,7 +3150,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{
	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
	xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
	INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
	INIT_LIST_HEAD(&fs_info->trans_list);
	INIT_LIST_HEAD(&fs_info->dead_roots);
	INIT_LIST_HEAD(&fs_info->delayed_iputs);
+74 −48
Original line number Diff line number Diff line
@@ -2966,7 +2966,7 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
}

/*
 * Find extent buffer for a given bytenr.
 * Find extent buffer for a givne bytenr.
 *
 * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
 * in endio context.
@@ -2985,9 +2985,11 @@ static struct extent_buffer *find_extent_buffer_readpage(
		return (struct extent_buffer *)page->private;
	}

	/* For subpage case, we need to lookup extent buffer xarray */
	eb = xa_load(&fs_info->extent_buffers,
	/* For subpage case, we need to lookup buffer radix tree */
	rcu_read_lock();
	eb = radix_tree_lookup(&fs_info->buffer_radix,
			       bytenr >> fs_info->sectorsize_bits);
	rcu_read_unlock();
	ASSERT(eb);
	return eb;
}
@@ -4434,7 +4436,7 @@ static struct extent_buffer *find_extent_buffer_nolock(
	struct extent_buffer *eb;

	rcu_read_lock();
	eb = xa_load(&fs_info->extent_buffers,
	eb = radix_tree_lookup(&fs_info->buffer_radix,
			       start >> fs_info->sectorsize_bits);
	if (eb && atomic_inc_not_zero(&eb->refs)) {
		rcu_read_unlock();
@@ -6128,22 +6130,24 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
	if (!eb)
		return ERR_PTR(-ENOMEM);
	eb->fs_info = fs_info;

	do {
		ret = xa_insert(&fs_info->extent_buffers,
				start >> fs_info->sectorsize_bits,
				eb, GFP_NOFS);
		if (ret == -ENOMEM) {
again:
	ret = radix_tree_preload(GFP_NOFS);
	if (ret) {
		exists = ERR_PTR(ret);
		goto free_eb;
	}
		if (ret == -EBUSY) {
	spin_lock(&fs_info->buffer_lock);
	ret = radix_tree_insert(&fs_info->buffer_radix,
				start >> fs_info->sectorsize_bits, eb);
	spin_unlock(&fs_info->buffer_lock);
	radix_tree_preload_end();
	if (ret == -EEXIST) {
		exists = find_extent_buffer(fs_info, start);
		if (exists)
			goto free_eb;
		else
			goto again;
	}
	} while (ret);

	check_buffer_tree_ref(eb);
	set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);

@@ -6318,22 +6322,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
	}
	if (uptodate)
		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);

	do {
		ret = xa_insert(&fs_info->extent_buffers,
				start >> fs_info->sectorsize_bits,
				eb, GFP_NOFS);
		if (ret == -ENOMEM) {
again:
	ret = radix_tree_preload(GFP_NOFS);
	if (ret) {
		exists = ERR_PTR(ret);
		goto free_eb;
	}
		if (ret == -EBUSY) {

	spin_lock(&fs_info->buffer_lock);
	ret = radix_tree_insert(&fs_info->buffer_radix,
				start >> fs_info->sectorsize_bits, eb);
	spin_unlock(&fs_info->buffer_lock);
	radix_tree_preload_end();
	if (ret == -EEXIST) {
		exists = find_extent_buffer(fs_info, start);
		if (exists)
			goto free_eb;
		else
			goto again;
	}
	} while (ret);

	/* add one reference for the tree */
	check_buffer_tree_ref(eb);
	set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
@@ -6378,8 +6385,10 @@ static int release_extent_buffer(struct extent_buffer *eb)

			spin_unlock(&eb->refs_lock);

			xa_erase(&fs_info->extent_buffers,
			spin_lock(&fs_info->buffer_lock);
			radix_tree_delete(&fs_info->buffer_radix,
					  eb->start >> fs_info->sectorsize_bits);
			spin_unlock(&fs_info->buffer_lock);
		} else {
			spin_unlock(&eb->refs_lock);
		}
@@ -7324,25 +7333,42 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
	}
}

#define GANG_LOOKUP_SIZE	16
static struct extent_buffer *get_next_extent_buffer(
		struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
{
	struct extent_buffer *eb;
	unsigned long index;
	struct extent_buffer *gang[GANG_LOOKUP_SIZE];
	struct extent_buffer *found = NULL;
	u64 page_start = page_offset(page);
	u64 cur = page_start;

	ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
	lockdep_assert_held(&fs_info->buffer_lock);

	xa_for_each_start(&fs_info->extent_buffers, index, eb,
			  page_start >> fs_info->sectorsize_bits) {
		if (in_range(eb->start, page_start, PAGE_SIZE))
			return eb;
		else if (eb->start >= page_start + PAGE_SIZE)
	while (cur < page_start + PAGE_SIZE) {
		int ret;
		int i;

		ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
				(void **)gang, cur >> fs_info->sectorsize_bits,
				min_t(unsigned int, GANG_LOOKUP_SIZE,
				      PAGE_SIZE / fs_info->nodesize));
		if (ret == 0)
			goto out;
		for (i = 0; i < ret; i++) {
			/* Already beyond page end */
			return NULL;
			if (gang[i]->start >= page_start + PAGE_SIZE)
				goto out;
			/* Found one */
			if (gang[i]->start >= bytenr) {
				found = gang[i];
				goto out;
			}
	return NULL;
		}
		cur = gang[ret - 1]->start + gang[ret - 1]->len;
	}
out:
	return found;
}

static int try_release_subpage_extent_buffer(struct page *page)
+19 −3
Original line number Diff line number Diff line
@@ -150,8 +150,8 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)

void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
{
	unsigned long index;
	struct extent_buffer *eb;
	struct radix_tree_iter iter;
	void **slot;
	struct btrfs_device *dev, *tmp;

	if (!fs_info)
@@ -163,9 +163,25 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)

	test_mnt->mnt_sb->s_fs_info = NULL;

	xa_for_each(&fs_info->extent_buffers, index, eb) {
	spin_lock(&fs_info->buffer_lock);
	radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
		struct extent_buffer *eb;

		eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
		if (!eb)
			continue;
		/* Shouldn't happen but that kind of thinking creates CVE's */
		if (radix_tree_exception(eb)) {
			if (radix_tree_deref_retry(eb))
				slot = radix_tree_iter_retry(&iter);
			continue;
		}
		slot = radix_tree_iter_resume(slot, &iter);
		spin_unlock(&fs_info->buffer_lock);
		free_extent_buffer_stale(eb);
		spin_lock(&fs_info->buffer_lock);
	}
	spin_unlock(&fs_info->buffer_lock);

	btrfs_mapping_tree_free(&fs_info->mapping_tree);
	list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices,