Commit f3cdc8ae authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "Highlights:

   - speedup dead root detection during orphan cleanup, eg. when there
     are many deleted subvolumes waiting to be cleaned, the trees are
     now looked up in radix tree instead of a O(N^2) search

   - snapshot creation with inherited qgroup will mark the qgroup
     inconsistent, requires a rescan

   - send will emit file capabilities after chown, this produces a
     stream that does not need postprocessing to set the capabilities
     again

   - direct io ported to iomap infrastructure, cleaned up and simplified
     code, notably removing last use of struct buffer_head in btrfs code

  Core changes:

   - factor out backreference iteration, to be used by ordinary
     backreferences and relocation code

   - improved global block reserve utilization
      * better logic to serialize requests
      * increased maximum available for unlink
      * improved handling on large pages (64K)

   - direct io cleanups and fixes
      * simplify layering, where cloned bios were unnecessarily created
        for some cases
      * error handling fixes (submit, endio)
      * remove repair worker thread, used to avoid deadlocks during
        repair

   - refactored block group reading code, preparatory work for new type
     of block group storage that should improve mount time on large
     filesystems

  Cleanups:

   - cleaned up (and slightly sped up) set/get helpers for metadata data
     structure members

   - root bit REF_COWS got renamed to SHAREABLE to reflect the that the
     blocks of the tree get shared either among subvolumes or with the
     relocation trees

  Fixes:

   - when subvolume deletion fails due to ENOSPC, the filesystem is not
     turned read-only

   - device scan deals with devices from other filesystems that changed
     ownership due to overwrite (mkfs)

   - fix a race between scrub and block group removal/allocation

   - fix long standing bug of a runaway balance operation, printing the
     same line to the syslog, caused by a stale status bit on a reloc
     tree that prevented progress

   - fix corrupt log due to concurrent fsync of inodes with shared
     extents

   - fix space underflow for NODATACOW and buffered writes when it for
     some reason needs to fallback to COW mode"

* tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (133 commits)
  btrfs: fix space_info bytes_may_use underflow during space cache writeout
  btrfs: fix space_info bytes_may_use underflow after nocow buffered write
  btrfs: fix wrong file range cleanup after an error filling dealloc range
  btrfs: remove redundant local variable in read_block_for_search
  btrfs: open code key_search
  btrfs: split btrfs_direct_IO to read and write part
  btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK
  fs: remove dio_end_io()
  btrfs: switch to iomap_dio_rw() for dio
  iomap: remove lockdep_assert_held()
  iomap: add a filesystem hook for direct I/O bio submission
  fs: export generic_file_buffered_read()
  btrfs: turn space cache writeout failure messages into debug messages
  btrfs: include error on messages about failure to write space/inode caches
  btrfs: remove useless 'fail_unlock' label from btrfs_csum_file_blocks()
  btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums
  btrfs: make checksum item extension more efficient
  btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents
  btrfs: unexport btrfs_compress_set_level()
  btrfs: simplify iget helpers
  ...
parents 8eeae5ba 2166e5ed
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -80,6 +80,7 @@ ForEachMacros:
  - 'ax25_uid_for_each'
  - '__bio_for_each_bvec'
  - 'bio_for_each_bvec'
  - 'bio_for_each_bvec_all'
  - 'bio_for_each_integrity_vec'
  - '__bio_for_each_segment'
  - 'bio_for_each_segment'
+2 −0
Original line number Diff line number Diff line
@@ -129,6 +129,7 @@ Usage of helpers:
::

	bio_for_each_segment_all()
	bio_for_each_bvec_all()
	bio_first_bvec_all()
	bio_first_page_all()
	bio_last_bvec_all()
@@ -143,4 +144,5 @@ Usage of helpers:
  bio_vec' will contain a multi-page IO vector during the iteration::

	bio_for_each_bvec()
	bio_for_each_bvec_all()
	rq_for_each_bvec()
+1 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ config BTRFS_FS
	select LZO_DECOMPRESS
	select ZSTD_COMPRESS
	select ZSTD_DECOMPRESS
	select FS_IOMAP
	select RAID6_PQ
	select XOR_BLOCKS
	select SRCU
+831 −6
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include "transaction.h"
#include "delayed-ref.h"
#include "locking.h"
#include "misc.h"

/* Just an arbitrary number so we can be sure this happened */
#define BACKREF_FOUND_SHARED 6
@@ -537,18 +538,13 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
				const u64 *extent_item_pos, bool ignore_offset)
{
	struct btrfs_root *root;
	struct btrfs_key root_key;
	struct extent_buffer *eb;
	int ret = 0;
	int root_level;
	int level = ref->level;
	struct btrfs_key search_key = ref->key_for_search;

	root_key.objectid = ref->root_id;
	root_key.type = BTRFS_ROOT_ITEM_KEY;
	root_key.offset = (u64)-1;

	root = btrfs_get_fs_root(fs_info, &root_key, false);
	root = btrfs_get_fs_root(fs_info, ref->root_id, false);
	if (IS_ERR(root)) {
		ret = PTR_ERR(root);
		goto out_free;
@@ -2295,3 +2291,832 @@ void free_ipath(struct inode_fs_paths *ipath)
	kvfree(ipath->fspath);
	kfree(ipath);
}

struct btrfs_backref_iter *btrfs_backref_iter_alloc(
		struct btrfs_fs_info *fs_info, gfp_t gfp_flag)
{
	struct btrfs_backref_iter *ret;

	ret = kzalloc(sizeof(*ret), gfp_flag);
	if (!ret)
		return NULL;

	ret->path = btrfs_alloc_path();
	if (!ret) {
		kfree(ret);
		return NULL;
	}

	/* Current backref iterator only supports iteration in commit root */
	ret->path->search_commit_root = 1;
	ret->path->skip_locking = 1;
	ret->fs_info = fs_info;

	return ret;
}

int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
{
	struct btrfs_fs_info *fs_info = iter->fs_info;
	struct btrfs_path *path = iter->path;
	struct btrfs_extent_item *ei;
	struct btrfs_key key;
	int ret;

	key.objectid = bytenr;
	key.type = BTRFS_METADATA_ITEM_KEY;
	key.offset = (u64)-1;
	iter->bytenr = bytenr;

	ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
	if (ret < 0)
		return ret;
	if (ret == 0) {
		ret = -EUCLEAN;
		goto release;
	}
	if (path->slots[0] == 0) {
		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
		ret = -EUCLEAN;
		goto release;
	}
	path->slots[0]--;

	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
	if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
	     key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) {
		ret = -ENOENT;
		goto release;
	}
	memcpy(&iter->cur_key, &key, sizeof(key));
	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
						    path->slots[0]);
	iter->end_ptr = (u32)(iter->item_ptr +
			btrfs_item_size_nr(path->nodes[0], path->slots[0]));
	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
			    struct btrfs_extent_item);

	/*
	 * Only support iteration on tree backref yet.
	 *
	 * This is an extra precaution for non skinny-metadata, where
	 * EXTENT_ITEM is also used for tree blocks, that we can only use
	 * extent flags to determine if it's a tree block.
	 */
	if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) {
		ret = -ENOTSUPP;
		goto release;
	}
	iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));

	/* If there is no inline backref, go search for keyed backref */
	if (iter->cur_ptr >= iter->end_ptr) {
		ret = btrfs_next_item(fs_info->extent_root, path);

		/* No inline nor keyed ref */
		if (ret > 0) {
			ret = -ENOENT;
			goto release;
		}
		if (ret < 0)
			goto release;

		btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key,
				path->slots[0]);
		if (iter->cur_key.objectid != bytenr ||
		    (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
		     iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
			ret = -ENOENT;
			goto release;
		}
		iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
							   path->slots[0]);
		iter->item_ptr = iter->cur_ptr;
		iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size_nr(
				      path->nodes[0], path->slots[0]));
	}

	return 0;
release:
	btrfs_backref_iter_release(iter);
	return ret;
}

/*
 * Go to the next backref item of current bytenr, can be either inlined or
 * keyed.
 *
 * Caller needs to check whether it's inline ref or not by iter->cur_key.
 *
 * Return 0 if we get next backref without problem.
 * Return >0 if there is no extra backref for this bytenr.
 * Return <0 if there is something wrong happened.
 */
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
{
	struct extent_buffer *eb = btrfs_backref_get_eb(iter);
	struct btrfs_path *path = iter->path;
	struct btrfs_extent_inline_ref *iref;
	int ret;
	u32 size;

	if (btrfs_backref_iter_is_inline_ref(iter)) {
		/* We're still inside the inline refs */
		ASSERT(iter->cur_ptr < iter->end_ptr);

		if (btrfs_backref_has_tree_block_info(iter)) {
			/* First tree block info */
			size = sizeof(struct btrfs_tree_block_info);
		} else {
			/* Use inline ref type to determine the size */
			int type;

			iref = (struct btrfs_extent_inline_ref *)
				((unsigned long)iter->cur_ptr);
			type = btrfs_extent_inline_ref_type(eb, iref);

			size = btrfs_extent_inline_ref_size(type);
		}
		iter->cur_ptr += size;
		if (iter->cur_ptr < iter->end_ptr)
			return 0;

		/* All inline items iterated, fall through */
	}

	/* We're at keyed items, there is no inline item, go to the next one */
	ret = btrfs_next_item(iter->fs_info->extent_root, iter->path);
	if (ret)
		return ret;

	btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]);
	if (iter->cur_key.objectid != iter->bytenr ||
	    (iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
	     iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY))
		return 1;
	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
					path->slots[0]);
	iter->cur_ptr = iter->item_ptr;
	iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size_nr(path->nodes[0],
						path->slots[0]);
	return 0;
}

void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
			      struct btrfs_backref_cache *cache, int is_reloc)
{
	int i;

	cache->rb_root = RB_ROOT;
	for (i = 0; i < BTRFS_MAX_LEVEL; i++)
		INIT_LIST_HEAD(&cache->pending[i]);
	INIT_LIST_HEAD(&cache->changed);
	INIT_LIST_HEAD(&cache->detached);
	INIT_LIST_HEAD(&cache->leaves);
	INIT_LIST_HEAD(&cache->pending_edge);
	INIT_LIST_HEAD(&cache->useless_node);
	cache->fs_info = fs_info;
	cache->is_reloc = is_reloc;
}

struct btrfs_backref_node *btrfs_backref_alloc_node(
		struct btrfs_backref_cache *cache, u64 bytenr, int level)
{
	struct btrfs_backref_node *node;

	ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL);
	node = kzalloc(sizeof(*node), GFP_NOFS);
	if (!node)
		return node;

	INIT_LIST_HEAD(&node->list);
	INIT_LIST_HEAD(&node->upper);
	INIT_LIST_HEAD(&node->lower);
	RB_CLEAR_NODE(&node->rb_node);
	cache->nr_nodes++;
	node->level = level;
	node->bytenr = bytenr;

	return node;
}

struct btrfs_backref_edge *btrfs_backref_alloc_edge(
		struct btrfs_backref_cache *cache)
{
	struct btrfs_backref_edge *edge;

	edge = kzalloc(sizeof(*edge), GFP_NOFS);
	if (edge)
		cache->nr_edges++;
	return edge;
}

/*
 * Drop the backref node from cache, also cleaning up all its
 * upper edges and any uncached nodes in the path.
 *
 * This cleanup happens bottom up, thus the node should either
 * be the lowest node in the cache or a detached node.
 */
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
				struct btrfs_backref_node *node)
{
	struct btrfs_backref_node *upper;
	struct btrfs_backref_edge *edge;

	if (!node)
		return;

	BUG_ON(!node->lowest && !node->detached);
	while (!list_empty(&node->upper)) {
		edge = list_entry(node->upper.next, struct btrfs_backref_edge,
				  list[LOWER]);
		upper = edge->node[UPPER];
		list_del(&edge->list[LOWER]);
		list_del(&edge->list[UPPER]);
		btrfs_backref_free_edge(cache, edge);

		if (RB_EMPTY_NODE(&upper->rb_node)) {
			BUG_ON(!list_empty(&node->upper));
			btrfs_backref_drop_node(cache, node);
			node = upper;
			node->lowest = 1;
			continue;
		}
		/*
		 * Add the node to leaf node list if no other child block
		 * cached.
		 */
		if (list_empty(&upper->lower)) {
			list_add_tail(&upper->lower, &cache->leaves);
			upper->lowest = 1;
		}
	}

	btrfs_backref_drop_node(cache, node);
}

/*
 * Release all nodes/edges from current cache
 */
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
{
	struct btrfs_backref_node *node;
	int i;

	while (!list_empty(&cache->detached)) {
		node = list_entry(cache->detached.next,
				  struct btrfs_backref_node, list);
		btrfs_backref_cleanup_node(cache, node);
	}

	while (!list_empty(&cache->leaves)) {
		node = list_entry(cache->leaves.next,
				  struct btrfs_backref_node, lower);
		btrfs_backref_cleanup_node(cache, node);
	}

	cache->last_trans = 0;

	for (i = 0; i < BTRFS_MAX_LEVEL; i++)
		ASSERT(list_empty(&cache->pending[i]));
	ASSERT(list_empty(&cache->pending_edge));
	ASSERT(list_empty(&cache->useless_node));
	ASSERT(list_empty(&cache->changed));
	ASSERT(list_empty(&cache->detached));
	ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
	ASSERT(!cache->nr_nodes);
	ASSERT(!cache->nr_edges);
}

/*
 * Handle direct tree backref
 *
 * Direct tree backref means, the backref item shows its parent bytenr
 * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
 *
 * @ref_key:	The converted backref key.
 *		For keyed backref, it's the item key.
 *		For inlined backref, objectid is the bytenr,
 *		type is btrfs_inline_ref_type, offset is
 *		btrfs_inline_ref_offset.
 */
static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
				      struct btrfs_key *ref_key,
				      struct btrfs_backref_node *cur)
{
	struct btrfs_backref_edge *edge;
	struct btrfs_backref_node *upper;
	struct rb_node *rb_node;

	ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);

	/* Only reloc root uses backref pointing to itself */
	if (ref_key->objectid == ref_key->offset) {
		struct btrfs_root *root;

		cur->is_reloc_root = 1;
		/* Only reloc backref cache cares about a specific root */
		if (cache->is_reloc) {
			root = find_reloc_root(cache->fs_info, cur->bytenr);
			if (WARN_ON(!root))
				return -ENOENT;
			cur->root = root;
		} else {
			/*
			 * For generic purpose backref cache, reloc root node
			 * is useless.
			 */
			list_add(&cur->list, &cache->useless_node);
		}
		return 0;
	}

	edge = btrfs_backref_alloc_edge(cache);
	if (!edge)
		return -ENOMEM;

	rb_node = rb_simple_search(&cache->rb_root, ref_key->offset);
	if (!rb_node) {
		/* Parent node not yet cached */
		upper = btrfs_backref_alloc_node(cache, ref_key->offset,
					   cur->level + 1);
		if (!upper) {
			btrfs_backref_free_edge(cache, edge);
			return -ENOMEM;
		}

		/*
		 *  Backrefs for the upper level block isn't cached, add the
		 *  block to pending list
		 */
		list_add_tail(&edge->list[UPPER], &cache->pending_edge);
	} else {
		/* Parent node already cached */
		upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
		ASSERT(upper->checked);
		INIT_LIST_HEAD(&edge->list[UPPER]);
	}
	btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER);
	return 0;
}

/*
 * Handle indirect tree backref
 *
 * Indirect tree backref means, we only know which tree the node belongs to.
 * We still need to do a tree search to find out the parents. This is for
 * TREE_BLOCK_REF backref (keyed or inlined).
 *
 * @ref_key:	The same as @ref_key in  handle_direct_tree_backref()
 * @tree_key:	The first key of this tree block.
 * @path:	A clean (released) path, to avoid allocating path everytime
 *		the function get called.
 */
static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache,
					struct btrfs_path *path,
					struct btrfs_key *ref_key,
					struct btrfs_key *tree_key,
					struct btrfs_backref_node *cur)
{
	struct btrfs_fs_info *fs_info = cache->fs_info;
	struct btrfs_backref_node *upper;
	struct btrfs_backref_node *lower;
	struct btrfs_backref_edge *edge;
	struct extent_buffer *eb;
	struct btrfs_root *root;
	struct rb_node *rb_node;
	int level;
	bool need_check = true;
	int ret;

	root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
	if (IS_ERR(root))
		return PTR_ERR(root);
	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
		cur->cowonly = 1;

	if (btrfs_root_level(&root->root_item) == cur->level) {
		/* Tree root */
		ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
		/*
		 * For reloc backref cache, we may ignore reloc root.  But for
		 * general purpose backref cache, we can't rely on
		 * btrfs_should_ignore_reloc_root() as it may conflict with
		 * current running relocation and lead to missing root.
		 *
		 * For general purpose backref cache, reloc root detection is
		 * completely relying on direct backref (key->offset is parent
		 * bytenr), thus only do such check for reloc cache.
		 */
		if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) {
			btrfs_put_root(root);
			list_add(&cur->list, &cache->useless_node);
		} else {
			cur->root = root;
		}
		return 0;
	}

	level = cur->level + 1;

	/* Search the tree to find parent blocks referring to the block */
	path->search_commit_root = 1;
	path->skip_locking = 1;
	path->lowest_level = level;
	ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0);
	path->lowest_level = 0;
	if (ret < 0) {
		btrfs_put_root(root);
		return ret;
	}
	if (ret > 0 && path->slots[level] > 0)
		path->slots[level]--;

	eb = path->nodes[level];
	if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
		btrfs_err(fs_info,
"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
			  cur->bytenr, level - 1, root->root_key.objectid,
			  tree_key->objectid, tree_key->type, tree_key->offset);
		btrfs_put_root(root);
		ret = -ENOENT;
		goto out;
	}
	lower = cur;

	/* Add all nodes and edges in the path */
	for (; level < BTRFS_MAX_LEVEL; level++) {
		if (!path->nodes[level]) {
			ASSERT(btrfs_root_bytenr(&root->root_item) ==
			       lower->bytenr);
			/* Same as previous should_ignore_reloc_root() call */
			if (btrfs_should_ignore_reloc_root(root) &&
			    cache->is_reloc) {
				btrfs_put_root(root);
				list_add(&lower->list, &cache->useless_node);
			} else {
				lower->root = root;
			}
			break;
		}

		edge = btrfs_backref_alloc_edge(cache);
		if (!edge) {
			btrfs_put_root(root);
			ret = -ENOMEM;
			goto out;
		}

		eb = path->nodes[level];
		rb_node = rb_simple_search(&cache->rb_root, eb->start);
		if (!rb_node) {
			upper = btrfs_backref_alloc_node(cache, eb->start,
							 lower->level + 1);
			if (!upper) {
				btrfs_put_root(root);
				btrfs_backref_free_edge(cache, edge);
				ret = -ENOMEM;
				goto out;
			}
			upper->owner = btrfs_header_owner(eb);
			if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
				upper->cowonly = 1;

			/*
			 * If we know the block isn't shared we can avoid
			 * checking its backrefs.
			 */
			if (btrfs_block_can_be_shared(root, eb))
				upper->checked = 0;
			else
				upper->checked = 1;

			/*
			 * Add the block to pending list if we need to check its
			 * backrefs, we only do this once while walking up a
			 * tree as we will catch anything else later on.
			 */
			if (!upper->checked && need_check) {
				need_check = false;
				list_add_tail(&edge->list[UPPER],
					      &cache->pending_edge);
			} else {
				if (upper->checked)
					need_check = true;
				INIT_LIST_HEAD(&edge->list[UPPER]);
			}
		} else {
			upper = rb_entry(rb_node, struct btrfs_backref_node,
					 rb_node);
			ASSERT(upper->checked);
			INIT_LIST_HEAD(&edge->list[UPPER]);
			if (!upper->owner)
				upper->owner = btrfs_header_owner(eb);
		}
		btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER);

		if (rb_node) {
			btrfs_put_root(root);
			break;
		}
		lower = upper;
		upper = NULL;
	}
out:
	btrfs_release_path(path);
	return ret;
}

/*
 * Add backref node @cur into @cache.
 *
 * NOTE: Even if the function returned 0, @cur is not yet cached as its upper
 *	 links aren't yet bi-directional. Needs to finish such links.
 *	 Use btrfs_backref_finish_upper_links() to finish such linkage.
 *
 * @path:	Released path for indirect tree backref lookup
 * @iter:	Released backref iter for extent tree search
 * @node_key:	The first key of the tree block
 */
int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
				struct btrfs_path *path,
				struct btrfs_backref_iter *iter,
				struct btrfs_key *node_key,
				struct btrfs_backref_node *cur)
{
	struct btrfs_fs_info *fs_info = cache->fs_info;
	struct btrfs_backref_edge *edge;
	struct btrfs_backref_node *exist;
	int ret;

	ret = btrfs_backref_iter_start(iter, cur->bytenr);
	if (ret < 0)
		return ret;
	/*
	 * We skip the first btrfs_tree_block_info, as we don't use the key
	 * stored in it, but fetch it from the tree block
	 */
	if (btrfs_backref_has_tree_block_info(iter)) {
		ret = btrfs_backref_iter_next(iter);
		if (ret < 0)
			goto out;
		/* No extra backref? This means the tree block is corrupted */
		if (ret > 0) {
			ret = -EUCLEAN;
			goto out;
		}
	}
	WARN_ON(cur->checked);
	if (!list_empty(&cur->upper)) {
		/*
		 * The backref was added previously when processing backref of
		 * type BTRFS_TREE_BLOCK_REF_KEY
		 */
		ASSERT(list_is_singular(&cur->upper));
		edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
				  list[LOWER]);
		ASSERT(list_empty(&edge->list[UPPER]));
		exist = edge->node[UPPER];
		/*
		 * Add the upper level block to pending list if we need check
		 * its backrefs
		 */
		if (!exist->checked)
			list_add_tail(&edge->list[UPPER], &cache->pending_edge);
	} else {
		exist = NULL;
	}

	for (; ret == 0; ret = btrfs_backref_iter_next(iter)) {
		struct extent_buffer *eb;
		struct btrfs_key key;
		int type;

		cond_resched();
		eb = btrfs_backref_get_eb(iter);

		key.objectid = iter->bytenr;
		if (btrfs_backref_iter_is_inline_ref(iter)) {
			struct btrfs_extent_inline_ref *iref;

			/* Update key for inline backref */
			iref = (struct btrfs_extent_inline_ref *)
				((unsigned long)iter->cur_ptr);
			type = btrfs_get_extent_inline_ref_type(eb, iref,
							BTRFS_REF_TYPE_BLOCK);
			if (type == BTRFS_REF_TYPE_INVALID) {
				ret = -EUCLEAN;
				goto out;
			}
			key.type = type;
			key.offset = btrfs_extent_inline_ref_offset(eb, iref);
		} else {
			key.type = iter->cur_key.type;
			key.offset = iter->cur_key.offset;
		}

		/*
		 * Parent node found and matches current inline ref, no need to
		 * rebuild this node for this inline ref
		 */
		if (exist &&
		    ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
		      exist->owner == key.offset) ||
		     (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
		      exist->bytenr == key.offset))) {
			exist = NULL;
			continue;
		}

		/* SHARED_BLOCK_REF means key.offset is the parent bytenr */
		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
			ret = handle_direct_tree_backref(cache, &key, cur);
			if (ret < 0)
				goto out;
			continue;
		} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
			ret = -EINVAL;
			btrfs_print_v0_err(fs_info);
			btrfs_handle_fs_error(fs_info, ret, NULL);
			goto out;
		} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
			continue;
		}

		/*
		 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
		 * means the root objectid. We need to search the tree to get
		 * its parent bytenr.
		 */
		ret = handle_indirect_tree_backref(cache, path, &key, node_key,
						   cur);
		if (ret < 0)
			goto out;
	}
	ret = 0;
	cur->checked = 1;
	WARN_ON(exist);
out:
	btrfs_backref_iter_release(iter);
	return ret;
}

/*
 * Finish the upwards linkage created by btrfs_backref_add_tree_node()
 */
int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
				     struct btrfs_backref_node *start)
{
	struct list_head *useless_node = &cache->useless_node;
	struct btrfs_backref_edge *edge;
	struct rb_node *rb_node;
	LIST_HEAD(pending_edge);

	ASSERT(start->checked);

	/* Insert this node to cache if it's not COW-only */
	if (!start->cowonly) {
		rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
					   &start->rb_node);
		if (rb_node)
			btrfs_backref_panic(cache->fs_info, start->bytenr,
					    -EEXIST);
		list_add_tail(&start->lower, &cache->leaves);
	}

	/*
	 * Use breadth first search to iterate all related edges.
	 *
	 * The starting points are all the edges of this node
	 */
	list_for_each_entry(edge, &start->upper, list[LOWER])
		list_add_tail(&edge->list[UPPER], &pending_edge);

	while (!list_empty(&pending_edge)) {
		struct btrfs_backref_node *upper;
		struct btrfs_backref_node *lower;
		struct rb_node *rb_node;

		edge = list_first_entry(&pending_edge,
				struct btrfs_backref_edge, list[UPPER]);
		list_del_init(&edge->list[UPPER]);
		upper = edge->node[UPPER];
		lower = edge->node[LOWER];

		/* Parent is detached, no need to keep any edges */
		if (upper->detached) {
			list_del(&edge->list[LOWER]);
			btrfs_backref_free_edge(cache, edge);

			/* Lower node is orphan, queue for cleanup */
			if (list_empty(&lower->upper))
				list_add(&lower->list, useless_node);
			continue;
		}

		/*
		 * All new nodes added in current build_backref_tree() haven't
		 * been linked to the cache rb tree.
		 * So if we have upper->rb_node populated, this means a cache
		 * hit. We only need to link the edge, as @upper and all its
		 * parents have already been linked.
		 */
		if (!RB_EMPTY_NODE(&upper->rb_node)) {
			if (upper->lowest) {
				list_del_init(&upper->lower);
				upper->lowest = 0;
			}

			list_add_tail(&edge->list[UPPER], &upper->lower);
			continue;
		}

		/* Sanity check, we shouldn't have any unchecked nodes */
		if (!upper->checked) {
			ASSERT(0);
			return -EUCLEAN;
		}

		/* Sanity check, COW-only node has non-COW-only parent */
		if (start->cowonly != upper->cowonly) {
			ASSERT(0);
			return -EUCLEAN;
		}

		/* Only cache non-COW-only (subvolume trees) tree blocks */
		if (!upper->cowonly) {
			rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
						   &upper->rb_node);
			if (rb_node) {
				btrfs_backref_panic(cache->fs_info,
						upper->bytenr, -EEXIST);
				return -EUCLEAN;
			}
		}

		list_add_tail(&edge->list[UPPER], &upper->lower);

		/*
		 * Also queue all the parent edges of this uncached node
		 * to finish the upper linkage
		 */
		list_for_each_entry(edge, &upper->upper, list[LOWER])
			list_add_tail(&edge->list[UPPER], &pending_edge);
	}
	return 0;
}

void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
				 struct btrfs_backref_node *node)
{
	struct btrfs_backref_node *lower;
	struct btrfs_backref_node *upper;
	struct btrfs_backref_edge *edge;

	while (!list_empty(&cache->useless_node)) {
		lower = list_first_entry(&cache->useless_node,
				   struct btrfs_backref_node, list);
		list_del_init(&lower->list);
	}
	while (!list_empty(&cache->pending_edge)) {
		edge = list_first_entry(&cache->pending_edge,
				struct btrfs_backref_edge, list[UPPER]);
		list_del(&edge->list[UPPER]);
		list_del(&edge->list[LOWER]);
		lower = edge->node[LOWER];
		upper = edge->node[UPPER];
		btrfs_backref_free_edge(cache, edge);

		/*
		 * Lower is no longer linked to any upper backref nodes and
		 * isn't in the cache, we can free it ourselves.
		 */
		if (list_empty(&lower->upper) &&
		    RB_EMPTY_NODE(&lower->rb_node))
			list_add(&lower->list, &cache->useless_node);

		if (!RB_EMPTY_NODE(&upper->rb_node))
			continue;

		/* Add this guy's upper edges to the list to process */
		list_for_each_entry(edge, &upper->upper, list[LOWER])
			list_add_tail(&edge->list[UPPER],
				      &cache->pending_edge);
		if (list_empty(&upper->upper))
			list_add(&upper->list, &cache->useless_node);
	}

	while (!list_empty(&cache->useless_node)) {
		lower = list_first_entry(&cache->useless_node,
				   struct btrfs_backref_node, list);
		list_del_init(&lower->list);
		if (lower == node)
			node = NULL;
		btrfs_backref_free_node(cache, lower);
	}

	btrfs_backref_cleanup_node(cache, node);
	ASSERT(list_empty(&cache->useless_node) &&
	       list_empty(&cache->pending_edge));
}
+297 −0

File changed.

Preview size limit exceeded, changes collapsed.

Loading