Commit 6f3952cb authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "This brings updates of space handling, performance improvements or bug
  fixes. The subpage block size and zoned mode features have reached
  state where they're usable but with limitations.

  Performance or related:

   - do not block on deleted block group mutex in the cleaner, avoids
     some long stalls

   - improved flushing: make it work better with ticket space
     reservations and avoid excessive transaction commits in some
     scenarios, slightly improves throughput for random write load

   - preemptive background flushing: separate the logic from ticket
     reservations, improve the accounting and decisions when to flush in
     low space conditions

   - less lock contention related to running delayed refs, let just one
     thread do the flushing when there are many inside transaction
     commit

   - dbench workload improvements: avoid unnecessary work when logging
     inodes, fewer fallbacks to transaction commit and thus less waiting
     for it (+7% throughput, -20% latency)

  Core:

   - subpage block size
      - currently read-only support
      - refactor and generalize code where sectorsize is assumed to be
        page size, add the subpage handling everywhere
      - the read-write support is on the way, page sizes are still
        limited to 4K or 64K

   - zoned mode, first working version but with limitations
      - SMR/ZBC/ZNS friendly allocation mode, utilizing the "no fixed
        location for structures" and chunked allocation
      - superblock as the only fixed data structure needs special
        handling, uses 2 consecutive zones as a ring buffer
      - tree-log support with a dedicated block group to avoid unordered
        writes
      - emulated zones on non-zoned devices
      - not yet working
      - all non-single block group profiles, requires more zone write
        pointer synchronization between the multiple block groups
      - fitrim due to dependency on space cache, can be implemented

  Fixes:

   - ref-verify: proper tree owner and node level tracking

   - fix pinned byte accounting, causing some early ENOSPC now more
     likely due to other changes in delayed refs

  Other:

   - error handling fixes and improvements

   - more error injection points

   - more function documentation

   - more and updated tracepoints

   - subset of W=1 checked by default

   - update comments to allow more automatic kdoc parameter checks"

* tag 'for-5.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (144 commits)
  btrfs: zoned: enable to mount ZONED incompat flag
  btrfs: zoned: deal with holes writing out tree-log pages
  btrfs: zoned: reorder log node allocation on zoned filesystem
  btrfs: zoned: serialize log transaction on zoned filesystems
  btrfs: zoned: extend zoned allocator to use dedicated tree-log block group
  btrfs: split alloc_log_tree()
  btrfs: zoned: relocate block group to repair IO failure in zoned filesystems
  btrfs: zoned: enable relocation on a zoned filesystem
  btrfs: zoned: support dev-replace in zoned filesystems
  btrfs: zoned: implement copying for zoned device-replace
  btrfs: zoned: implement cloning for zoned device-replace
  btrfs: zoned: mark block groups to copy for device-replace
  btrfs: zoned: do not use async metadata checksum on zoned filesystems
  btrfs: zoned: wait for existing extents before truncating
  btrfs: zoned: serialize metadata IO
  btrfs: zoned: introduce dedicated data write path for zoned filesystems
  btrfs: zoned: enable zone append writing for direct IO
  btrfs: zoned: use ZONE_APPEND write for zoned mode
  btrfs: save irq flags when looking up an ordered extent
  btrfs: zoned: cache if block group is on a sequential zone
  ...
parents f9d58de2 9d294a68
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
@@ -851,6 +851,39 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio,
}
EXPORT_SYMBOL(bio_add_pc_page);

/**
 * bio_add_zone_append_page - attempt to add page to zone-append bio
 * @bio: destination bio
 * @page: page to add
 * @len: vec entry length
 * @offset: vec entry offset
 *
 * Attempt to add a page to the bio_vec maplist of a bio that will be submitted
 * for a zone-append request. This can fail for a number of reasons, such as the
 * bio being full or the target block device is not a zoned block device or
 * other limitations of the target block device. The target block device must
 * allow bio's up to PAGE_SIZE, so it is always possible to add a single page
 * to an empty bio.
 *
 * Returns: number of bytes added to the bio, or 0 in case of a failure.
 */
int bio_add_zone_append_page(struct bio *bio, struct page *page,
			     unsigned int len, unsigned int offset)
{
	struct request_queue *q = bio->bi_disk->queue;
	bool same_page = false;

	if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
		return 0;

	if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
		return 0;

	return bio_add_hw_page(q, bio, page, len, offset,
			       queue_max_zone_append_sectors(q), &same_page);
}
EXPORT_SYMBOL_GPL(bio_add_zone_append_page);

/**
 * __bio_try_merge_page - try appending data to an existing bvec.
 * @bio: destination bio
+18 −1
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0

# Subset of W=1 warnings
subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
subdir-ccflags-y += -Wmissing-declarations
subdir-ccflags-y += -Wmissing-format-attribute
subdir-ccflags-y += -Wmissing-prototypes
subdir-ccflags-y += -Wold-style-definition
subdir-ccflags-y += -Wmissing-include-dirs
subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
# The following turn off the warnings enabled by -Wextra
subdir-ccflags-y += -Wno-missing-field-initializers
subdir-ccflags-y += -Wno-sign-compare
subdir-ccflags-y += -Wno-type-limits

obj-$(CONFIG_BTRFS_FS) := btrfs.o

btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
@@ -11,7 +27,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
	   uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
	   block-rsv.o delalloc-space.o block-group.o discard.o reflink.o
	   block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
	   subpage.o

btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
+8 −9
Original line number Diff line number Diff line
@@ -1501,7 +1501,13 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
}

/**
 * btrfs_check_shared - tell us whether an extent is shared
 * Check if an extent is shared or not
 *
 * @root:   root inode belongs to
 * @inum:   inode number of the inode whose extent we are checking
 * @bytenr: logical bytenr of the extent we are checking
 * @roots:  list of roots this extent is shared among
 * @tmp:    temporary list used for iteration
 *
 * btrfs_check_shared uses the backref walking code but will short
 * circuit as soon as it finds a root or inode that doesn't match the
@@ -2541,13 +2547,6 @@ void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
		list_del(&edge->list[UPPER]);
		btrfs_backref_free_edge(cache, edge);

		if (RB_EMPTY_NODE(&upper->rb_node)) {
			BUG_ON(!list_empty(&node->upper));
			btrfs_backref_drop_node(cache, node);
			node = upper;
			node->lowest = 1;
			continue;
		}
		/*
		 * Add the node to leaf node list if no other child block
		 * cached.
@@ -2624,7 +2623,7 @@ static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
		/* Only reloc backref cache cares about a specific root */
		if (cache->is_reloc) {
			root = find_reloc_root(cache->fs_info, cur->bytenr);
			if (WARN_ON(!root))
			if (!root)
				return -ENOENT;
			cur->root = root;
		} else {
+6 −3
Original line number Diff line number Diff line
@@ -296,6 +296,9 @@ static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
					   struct btrfs_backref_node *node)
{
	if (node) {
		ASSERT(list_empty(&node->list));
		ASSERT(list_empty(&node->lower));
		ASSERT(node->eb == NULL);
		cache->nr_nodes--;
		btrfs_put_root(node->root);
		kfree(node);
@@ -340,11 +343,11 @@ static inline void btrfs_backref_drop_node_buffer(
static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
					   struct btrfs_backref_node *node)
{
	BUG_ON(!list_empty(&node->upper));
	ASSERT(list_empty(&node->upper));

	btrfs_backref_drop_node_buffer(node);
	list_del(&node->list);
	list_del(&node->lower);
	list_del_init(&node->list);
	list_del_init(&node->lower);
	if (!RB_EMPTY_NODE(&node->rb_node))
		rb_erase(&node->rb_node, &tree->rb_root);
	btrfs_backref_free_node(tree, node);
+120 −58
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include "delalloc-space.h"
#include "discard.h"
#include "raid56.h"
#include "zoned.h"

/*
 * Return target flags in extended format or 0 if restripe for this chunk_type
@@ -724,6 +725,10 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
	struct btrfs_caching_control *caching_ctl = NULL;
	int ret = 0;

	/* Allocator for zoned filesystems does not use the cache at all */
	if (btrfs_is_zoned(fs_info))
		return 0;

	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
	if (!caching_ctl)
		return -ENOMEM;
@@ -896,6 +901,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
	btrfs_return_cluster_to_free_space(block_group, cluster);
	spin_unlock(&cluster->refill_lock);

	btrfs_clear_treelog_bg(block_group);

	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
@@ -1008,12 +1015,17 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
		WARN_ON(block_group->space_info->total_bytes
			< block_group->length);
		WARN_ON(block_group->space_info->bytes_readonly
			< block_group->length);
			< block_group->length - block_group->zone_unusable);
		WARN_ON(block_group->space_info->bytes_zone_unusable
			< block_group->zone_unusable);
		WARN_ON(block_group->space_info->disk_total
			< block_group->length * factor);
	}
	block_group->space_info->total_bytes -= block_group->length;
	block_group->space_info->bytes_readonly -= block_group->length;
	block_group->space_info->bytes_readonly -=
		(block_group->length - block_group->zone_unusable);
	block_group->space_info->bytes_zone_unusable -=
		block_group->zone_unusable;
	block_group->space_info->disk_total -= block_group->length * factor;

	spin_unlock(&block_group->space_info->lock);
@@ -1157,7 +1169,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
	}

	num_bytes = cache->length - cache->reserved - cache->pinned -
		    cache->bytes_super - cache->used;
		    cache->bytes_super - cache->zone_unusable - cache->used;

	/*
	 * Data never overcommits, even in mixed mode, so do just the straight
@@ -1188,6 +1200,12 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)

	if (!ret) {
		sinfo->bytes_readonly += num_bytes;
		if (btrfs_is_zoned(cache->fs_info)) {
			/* Migrate zone_unusable bytes to readonly */
			sinfo->bytes_readonly += cache->zone_unusable;
			sinfo->bytes_zone_unusable -= cache->zone_unusable;
			cache->zone_unusable = 0;
		}
		cache->ro++;
		list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
	}
@@ -1262,6 +1280,13 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
		return;

	/*
	 * Long running balances can keep us blocked here for eternity, so
	 * simply skip deletion if we're unable to get the mutex.
	 */
	if (!mutex_trylock(&fs_info->delete_unused_bgs_mutex))
		return;

	spin_lock(&fs_info->unused_bgs_lock);
	while (!list_empty(&fs_info->unused_bgs)) {
		int trimming;
@@ -1281,8 +1306,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)

		btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);

		mutex_lock(&fs_info->delete_unused_bgs_mutex);

		/* Don't want to race with allocators so take the groups_sem */
		down_write(&space_info->groups_sem);

@@ -1371,9 +1394,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		btrfs_space_info_update_bytes_pinned(fs_info, space_info,
						     -block_group->pinned);
		space_info->bytes_readonly += block_group->pinned;
		percpu_counter_add_batch(&space_info->total_bytes_pinned,
				   -block_group->pinned,
				   BTRFS_TOTAL_BYTES_PINNED_BATCH);
		__btrfs_mod_total_bytes_pinned(space_info, -block_group->pinned);
		block_group->pinned = 0;

		spin_unlock(&block_group->lock);
@@ -1389,8 +1410,12 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
			goto flip_async;

		/* DISCARD can flip during remount */
		trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
		/*
		 * DISCARD can flip during remount. On zoned filesystems, we
		 * need to reset sequential-required zones.
		 */
		trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) ||
				btrfs_is_zoned(fs_info);

		/* Implicit trim during transaction commit. */
		if (trimming)
@@ -1428,11 +1453,11 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
end_trans:
		btrfs_end_transaction(trans);
next:
		mutex_unlock(&fs_info->delete_unused_bgs_mutex);
		btrfs_put_block_group(block_group);
		spin_lock(&fs_info->unused_bgs_lock);
	}
	spin_unlock(&fs_info->unused_bgs_lock);
	mutex_unlock(&fs_info->delete_unused_bgs_mutex);
	return;

flip_async:
@@ -1561,8 +1586,11 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}

/**
 * btrfs_rmap_block - Map a physical disk address to a list of logical addresses
 * Map a physical disk address to a list of logical addresses
 *
 * @fs_info:       the filesystem
 * @chunk_start:   logical address of block group
 * @bdev:	   physical device to resolve, can be NULL to indicate any device
 * @physical:	   physical address to map to logical addresses
 * @logical:	   return array of logical addresses which map to @physical
 * @naddrs:	   length of @logical
@@ -1572,9 +1600,9 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 * Used primarily to exclude those portions of a block group that contain super
 * block copies.
 */
EXPORT_FOR_TESTS
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
		     u64 physical, u64 **logical, int *naddrs, int *stripe_len)
		     struct block_device *bdev, u64 physical, u64 **logical,
		     int *naddrs, int *stripe_len)
{
	struct extent_map *em;
	struct map_lookup *map;
@@ -1592,6 +1620,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
	map = em->map_lookup;
	data_stripe_length = em->orig_block_len;
	io_stripe_size = map->stripe_len;
	chunk_start = em->start;

	/* For RAID5/6 adjust to a full IO stripe length */
	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
@@ -1606,14 +1635,18 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
	for (i = 0; i < map->num_stripes; i++) {
		bool already_inserted = false;
		u64 stripe_nr;
		u64 offset;
		int j;

		if (!in_range(physical, map->stripes[i].physical,
			      data_stripe_length))
			continue;

		if (bdev && map->stripes[i].dev->bdev != bdev)
			continue;

		stripe_nr = physical - map->stripes[i].physical;
		stripe_nr = div64_u64(stripe_nr, map->stripe_len);
		stripe_nr = div64_u64_rem(stripe_nr, map->stripe_len, &offset);

		if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
			stripe_nr = stripe_nr * map->num_stripes + i;
@@ -1627,7 +1660,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
		 * instead of map->stripe_len
		 */

		bytenr = chunk_start + stripe_nr * io_stripe_size;
		bytenr = chunk_start + stripe_nr * io_stripe_size + offset;

		/* Ensure we don't add duplicate addresses */
		for (j = 0; j < nr; j++) {
@@ -1669,7 +1702,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)

	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
		bytenr = btrfs_sb_offset(i);
		ret = btrfs_rmap_block(fs_info, cache->start,
		ret = btrfs_rmap_block(fs_info, cache->start, NULL,
				       bytenr, &logical, &nr, &stripe_len);
		if (ret)
			return ret;
@@ -1805,24 +1838,8 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
	return ret;
}

static void read_block_group_item(struct btrfs_block_group *cache,
				 struct btrfs_path *path,
				 const struct btrfs_key *key)
{
	struct extent_buffer *leaf = path->nodes[0];
	struct btrfs_block_group_item bgi;
	int slot = path->slots[0];

	cache->length = key->offset;

	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
			   sizeof(bgi));
	cache->used = btrfs_stack_block_group_used(&bgi);
	cache->flags = btrfs_stack_block_group_flags(&bgi);
}

static int read_one_block_group(struct btrfs_fs_info *info,
				struct btrfs_path *path,
				struct btrfs_block_group_item *bgi,
				const struct btrfs_key *key,
				int need_clear)
{
@@ -1837,7 +1854,9 @@ static int read_one_block_group(struct btrfs_fs_info *info,
	if (!cache)
		return -ENOMEM;

	read_block_group_item(cache, path, key);
	cache->length = key->offset;
	cache->used = btrfs_stack_block_group_used(bgi);
	cache->flags = btrfs_stack_block_group_flags(bgi);

	set_free_space_tree_thresholds(cache);

@@ -1864,6 +1883,13 @@ static int read_one_block_group(struct btrfs_fs_info *info,
			goto error;
	}

	ret = btrfs_load_block_group_zone_info(cache, false);
	if (ret) {
		btrfs_err(info, "zoned: failed to load zone info of bg %llu",
			  cache->start);
		goto error;
	}

	/*
	 * We need to exclude the super stripes now so that the space info has
	 * super bytes accounted for, otherwise we'll think we have more space
@@ -1877,12 +1903,20 @@ static int read_one_block_group(struct btrfs_fs_info *info,
	}

	/*
	 * Check for two cases, either we are full, and therefore don't need
	 * to bother with the caching work since we won't find any space, or we
	 * are empty, and we can just add all the space in and be done with it.
	 * This saves us _a_lot_ of time, particularly in the full case.
	 */
	if (cache->length == cache->used) {
	 * For zoned filesystem, space after the allocation offset is the only
	 * free space for a block group. So, we don't need any caching work.
	 * btrfs_calc_zone_unusable() will set the amount of free space and
	 * zone_unusable space.
	 *
	 * For regular filesystem, check for two cases, either we are full, and
	 * therefore don't need to bother with the caching work since we won't
	 * find any space, or we are empty, and we can just add all the space
	 * in and be done with it.  This saves us _a_lot_ of time, particularly
	 * in the full case.
	 */
	if (btrfs_is_zoned(info)) {
		btrfs_calc_zone_unusable(cache);
	} else if (cache->length == cache->used) {
		cache->last_byte_to_unpin = (u64)-1;
		cache->cached = BTRFS_CACHE_FINISHED;
		btrfs_free_excluded_extents(cache);
@@ -1901,7 +1935,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
	}
	trace_btrfs_add_block_group(info, cache, 0);
	btrfs_update_space_info(info, cache->flags, cache->length,
				cache->used, cache->bytes_super, &space_info);
				cache->used, cache->bytes_super,
				cache->zone_unusable, &space_info);

	cache->space_info = space_info;

@@ -1957,7 +1992,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
			break;
		}
		btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
					0, &space_info);
					0, 0, &space_info);
		bg->space_info = space_info;
		link_block_group(bg);

@@ -1996,19 +2031,29 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
		need_clear = 1;

	while (1) {
		struct btrfs_block_group_item bgi;
		struct extent_buffer *leaf;
		int slot;

		ret = find_first_block_group(info, path, &key);
		if (ret > 0)
			break;
		if (ret != 0)
			goto error;

		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
		ret = read_one_block_group(info, path, &key, need_clear);
		leaf = path->nodes[0];
		slot = path->slots[0];

		read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
				   sizeof(bgi));

		btrfs_item_key_to_cpu(leaf, &key, slot);
		btrfs_release_path(path);
		ret = read_one_block_group(info, &bgi, &key, need_clear);
		if (ret < 0)
			goto error;
		key.objectid += key.offset;
		key.offset = 0;
		btrfs_release_path(path);
	}
	btrfs_release_path(path);

@@ -2140,6 +2185,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
	cache->cached = BTRFS_CACHE_FINISHED;
	if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
		cache->needs_free_space = 1;

	ret = btrfs_load_block_group_zone_info(cache, true);
	if (ret) {
		btrfs_put_block_group(cache);
		return ret;
	}

	ret = exclude_super_stripes(cache);
	if (ret) {
		/* We may have excluded something, so call this just in case */
@@ -2181,7 +2233,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
	 */
	trace_btrfs_add_block_group(fs_info, cache, 1);
	btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
				cache->bytes_super, &cache->space_info);
				cache->bytes_super, 0, &cache->space_info);
	btrfs_update_global_block_rsv(fs_info);

	link_block_group(cache);
@@ -2289,8 +2341,15 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
	spin_lock(&cache->lock);
	if (!--cache->ro) {
		num_bytes = cache->length - cache->reserved -
			    cache->pinned - cache->bytes_super - cache->used;
			    cache->pinned - cache->bytes_super -
			    cache->zone_unusable - cache->used;
		sinfo->bytes_readonly -= num_bytes;
		if (btrfs_is_zoned(cache->fs_info)) {
			/* Migrate zone_unusable bytes back */
			cache->zone_unusable = cache->alloc_offset - cache->used;
			sinfo->bytes_zone_unusable += cache->zone_unusable;
			sinfo->bytes_readonly -= cache->zone_unusable;
		}
		list_del_init(&cache->ro_list);
	}
	spin_unlock(&cache->lock);
@@ -2564,8 +2623,10 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)

	if (!path) {
		path = btrfs_alloc_path();
		if (!path)
			return -ENOMEM;
		if (!path) {
			ret = -ENOMEM;
			goto out;
		}
	}

	/*
@@ -2659,16 +2720,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
			btrfs_put_block_group(cache);
		if (drop_reserve)
			btrfs_delayed_refs_rsv_release(fs_info, 1);

		if (ret)
			break;

		/*
		 * Avoid blocking other tasks for too long. It might even save
		 * us from writing caches for block groups that are going to be
		 * removed.
		 */
		mutex_unlock(&trans->transaction->cache_write_mutex);
		if (ret)
			goto out;
		mutex_lock(&trans->transaction->cache_write_mutex);
	}
	mutex_unlock(&trans->transaction->cache_write_mutex);
@@ -2692,7 +2751,12 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
			goto again;
		}
		spin_unlock(&cur_trans->dirty_bgs_lock);
	} else if (ret < 0) {
	}
out:
	if (ret < 0) {
		spin_lock(&cur_trans->dirty_bgs_lock);
		list_splice_init(&dirty, &cur_trans->dirty_bgs);
		spin_unlock(&cur_trans->dirty_bgs_lock);
		btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
	}

@@ -2896,10 +2960,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
			spin_unlock(&cache->lock);
			spin_unlock(&cache->space_info->lock);

			percpu_counter_add_batch(
					&cache->space_info->total_bytes_pinned,
					num_bytes,
					BTRFS_TOTAL_BYTES_PINNED_BATCH);
			__btrfs_mod_total_bytes_pinned(cache->space_info,
						       num_bytes);
			set_extent_dirty(&trans->transaction->pinned_extents,
					 bytenr, bytenr + num_bytes - 1,
					 GFP_NOFS | __GFP_NOFAIL);
Loading