Merge tag 'for-5.13-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux (55ba0fe0) · Commits · EulixOS / Software / Kernel

fs/btrfs/Makefile

+1 −1

Original line number	Diff line number	Diff line
		@@ -30,7 +30,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
		reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
		uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
		block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
		subpage.o
		subpage.o tree-mod-log.o

		btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
		btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

fs/btrfs/backref.c

+17 −16

Original line number	Diff line number	Diff line
		@@ -14,6 +14,7 @@
		#include "delayed-ref.h"
		#include "locking.h"
		#include "misc.h"
		#include "tree-mod-log.h"

		/* Just an arbitrary number so we can be sure this happened */
		#define BACKREF_FOUND_SHARED 6
		@@ -452,7 +453,7 @@ static int add_all_parents(struct btrfs_root root, struct btrfs_path path,
		if (path->slots[0] >= btrfs_header_nritems(eb) \|\|
		is_shared_data_backref(preftrees, eb->start) \|\|
		ref->root_id != btrfs_header_owner(eb)) {
		if (time_seq == SEQ_LAST)
		if (time_seq == BTRFS_SEQ_LAST)
		ret = btrfs_next_leaf(root, path);
		else
		ret = btrfs_next_old_leaf(root, path, time_seq);
		@@ -476,7 +477,7 @@ static int add_all_parents(struct btrfs_root root, struct btrfs_path path,
		if (slot == 0 &&
		(is_shared_data_backref(preftrees, eb->start) \|\|
		ref->root_id != btrfs_header_owner(eb))) {
		if (time_seq == SEQ_LAST)
		if (time_seq == BTRFS_SEQ_LAST)
		ret = btrfs_next_leaf(root, path);
		else
		ret = btrfs_next_old_leaf(root, path, time_seq);
		@@ -514,7 +515,7 @@ static int add_all_parents(struct btrfs_root root, struct btrfs_path path,
		eie = NULL;
		}
		next:
		if (time_seq == SEQ_LAST)
		if (time_seq == BTRFS_SEQ_LAST)
		ret = btrfs_next_item(root, path);
		else
		ret = btrfs_next_old_item(root, path, time_seq);
		@@ -574,7 +575,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,

		if (path->search_commit_root)
		root_level = btrfs_header_level(root->commit_root);
		else if (time_seq == SEQ_LAST)
		else if (time_seq == BTRFS_SEQ_LAST)
		root_level = btrfs_header_level(root->node);
		else
		root_level = btrfs_old_root_level(root, time_seq);
		@@ -605,7 +606,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
		search_key.offset >= LLONG_MAX)
		search_key.offset = 0;
		path->lowest_level = level;
		if (time_seq == SEQ_LAST)
		if (time_seq == BTRFS_SEQ_LAST)
		ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
		else
		ret = btrfs_search_old_slot(root, &search_key, path, time_seq);
		@@ -1147,8 +1148,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
		* indirect refs to their parent bytenr.
		* When roots are found, they're added to the roots list
		*
		* If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave
		* much like trans == NULL case, the difference only lies in it will not
		* If time_seq is set to BTRFS_SEQ_LAST, it will not search delayed_refs, and
		* behave much like trans == NULL case, the difference only lies in it will not
		* commit root.
		* The special case is for qgroup to search roots in commit_transaction().
		*
		@@ -1199,7 +1200,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
		path->skip_locking = 1;
		}

		if (time_seq == SEQ_LAST)
		if (time_seq == BTRFS_SEQ_LAST)
		path->skip_locking = 1;

		/*
		@@ -1217,9 +1218,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,

		#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
		if (trans && likely(trans->type != __TRANS_DUMMY) &&
		time_seq != SEQ_LAST) {
		time_seq != BTRFS_SEQ_LAST) {
		#else
		if (trans && time_seq != SEQ_LAST) {
		if (trans && time_seq != BTRFS_SEQ_LAST) {
		#endif
		/*
		* look if there are updates for this ref queued and lock the
		@@ -1527,7 +1528,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
		struct btrfs_trans_handle *trans;
		struct ulist_iterator uiter;
		struct ulist_node *node;
		struct seq_list elem = SEQ_LIST_INIT(elem);
		struct btrfs_seq_list elem = BTRFS_SEQ_LIST_INIT(elem);
		int ret = 0;
		struct share_check shared = {
		.root_objectid = root->root_key.objectid,
		@@ -1953,7 +1954,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
		struct ulist *roots = NULL;
		struct ulist_node *ref_node = NULL;
		struct ulist_node *root_node = NULL;
		struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
		struct btrfs_seq_list seq_elem = BTRFS_SEQ_LIST_INIT(seq_elem);
		struct ulist_iterator ref_uiter;
		struct ulist_iterator root_uiter;

		@@ -1971,12 +1972,12 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
		}

		if (trans)
		btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
		btrfs_get_tree_mod_seq(fs_info, &seq_elem);
		else
		down_read(&fs_info->commit_root_sem);

		ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
		tree_mod_seq_elem.seq, &refs,
		seq_elem.seq, &refs,
		&extent_item_pos, ignore_offset);
		if (ret)
		goto out;
		@@ -1984,7 +1985,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
		ULIST_ITER_INIT(&ref_uiter);
		while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
		ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
		tree_mod_seq_elem.seq, &roots,
		seq_elem.seq, &roots,
		ignore_offset);
		if (ret)
		break;
		@@ -2007,7 +2008,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
		free_leaf_list(refs);
		out:
		if (trans) {
		btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
		btrfs_put_tree_mod_seq(fs_info, &seq_elem);
		btrfs_end_transaction(trans);
		} else {
		up_read(&fs_info->commit_root_sem);

fs/btrfs/block-group.c

+184 −23

Original line number	Diff line number	Diff line
		@@ -1289,7 +1289,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		* Long running balances can keep us blocked here for eternity, so
		* simply skip deletion if we're unable to get the mutex.
		*/
		if (!mutex_trylock(&fs_info->delete_unused_bgs_mutex))
		if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
		return;

		spin_lock(&fs_info->unused_bgs_lock);
		@@ -1462,12 +1462,12 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		spin_lock(&fs_info->unused_bgs_lock);
		}
		spin_unlock(&fs_info->unused_bgs_lock);
		mutex_unlock(&fs_info->delete_unused_bgs_mutex);
		mutex_unlock(&fs_info->reclaim_bgs_lock);
		return;

		flip_async:
		btrfs_end_transaction(trans);
		mutex_unlock(&fs_info->delete_unused_bgs_mutex);
		mutex_unlock(&fs_info->reclaim_bgs_lock);
		btrfs_put_block_group(block_group);
		btrfs_discard_punt_unused_bgs_list(fs_info);
		}
		@@ -1485,6 +1485,97 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
		spin_unlock(&fs_info->unused_bgs_lock);
		}

		void btrfs_reclaim_bgs_work(struct work_struct *work)
		{
		struct btrfs_fs_info *fs_info =
		container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
		struct btrfs_block_group *bg;
		struct btrfs_space_info *space_info;
		int ret;

		if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
		return;

		if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
		return;

		mutex_lock(&fs_info->reclaim_bgs_lock);
		spin_lock(&fs_info->unused_bgs_lock);
		while (!list_empty(&fs_info->reclaim_bgs)) {
		bg = list_first_entry(&fs_info->reclaim_bgs,
		struct btrfs_block_group,
		bg_list);
		list_del_init(&bg->bg_list);

		space_info = bg->space_info;
		spin_unlock(&fs_info->unused_bgs_lock);

		/* Don't race with allocators so take the groups_sem */
		down_write(&space_info->groups_sem);

		spin_lock(&bg->lock);
		if (bg->reserved \|\| bg->pinned \|\| bg->ro) {
		/*
		* We want to bail if we made new allocations or have
		* outstanding allocations in this block group. We do
		* the ro check in case balance is currently acting on
		* this block group.
		*/
		spin_unlock(&bg->lock);
		up_write(&space_info->groups_sem);
		goto next;
		}
		spin_unlock(&bg->lock);

		/* Get out fast, in case we're unmounting the filesystem */
		if (btrfs_fs_closing(fs_info)) {
		up_write(&space_info->groups_sem);
		goto next;
		}

		ret = inc_block_group_ro(bg, 0);
		up_write(&space_info->groups_sem);
		if (ret < 0)
		goto next;

		btrfs_info(fs_info, "reclaiming chunk %llu with %llu%% used",
		bg->start, div_u64(bg->used * 100, bg->length));
		trace_btrfs_reclaim_block_group(bg);
		ret = btrfs_relocate_chunk(fs_info, bg->start);
		if (ret)
		btrfs_err(fs_info, "error relocating chunk %llu",
		bg->start);

		next:
		btrfs_put_block_group(bg);
		spin_lock(&fs_info->unused_bgs_lock);
		}
		spin_unlock(&fs_info->unused_bgs_lock);
		mutex_unlock(&fs_info->reclaim_bgs_lock);
		btrfs_exclop_finish(fs_info);
		}

		void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
		{
		spin_lock(&fs_info->unused_bgs_lock);
		if (!list_empty(&fs_info->reclaim_bgs))
		queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
		spin_unlock(&fs_info->unused_bgs_lock);
		}

		void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
		{
		struct btrfs_fs_info *fs_info = bg->fs_info;

		spin_lock(&fs_info->unused_bgs_lock);
		if (list_empty(&bg->bg_list)) {
		btrfs_get_block_group(bg);
		trace_btrfs_add_reclaim_block_group(bg);
		list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs);
		}
		spin_unlock(&fs_info->unused_bgs_lock);
		}

		static int read_bg_from_eb(struct btrfs_fs_info fs_info, struct btrfs_key key,
		struct btrfs_path *path)
		{
		@@ -2267,16 +2358,19 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
		struct btrfs_trans_handle *trans;
		u64 alloc_flags;
		int ret;
		bool dirty_bg_running;

		again:
		do {
		trans = btrfs_join_transaction(fs_info->extent_root);
		if (IS_ERR(trans))
		return PTR_ERR(trans);

		dirty_bg_running = false;

		/*
		* we're not allowed to set block groups readonly after the dirty
		* block groups cache has started writing. If it already started,
		* back off and let this transaction commit
		* We're not allowed to set block groups readonly after the dirty
		* block group cache has started writing. If it already started,
		* back off and let this transaction commit.
		*/
		mutex_lock(&fs_info->ro_block_group_mutex);
		if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
		@@ -2288,8 +2382,9 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
		ret = btrfs_wait_for_commit(fs_info, transid);
		if (ret)
		return ret;
		goto again;
		dirty_bg_running = true;
		}
		} while (dirty_bg_running);

		if (do_chunk_alloc) {
		/*
		@@ -3269,6 +3364,7 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
		*/
		void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
		{
		struct btrfs_transaction *cur_trans = trans->transaction;
		struct btrfs_fs_info *fs_info = trans->fs_info;
		struct btrfs_space_info *info;
		u64 left;
		@@ -3283,6 +3379,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
		lockdep_assert_held(&fs_info->chunk_mutex);

		info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
		again:
		spin_lock(&info->lock);
		left = info->total_bytes - btrfs_space_info_used(info, true);
		spin_unlock(&info->lock);
		@@ -3301,6 +3398,58 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)

		if (left < thresh) {
		u64 flags = btrfs_system_alloc_profile(fs_info);
		u64 reserved = atomic64_read(&cur_trans->chunk_bytes_reserved);

		/*
		* If there's not available space for the chunk tree (system
		* space) and there are other tasks that reserved space for
		* creating a new system block group, wait for them to complete
		* the creation of their system block group and release excess
		* reserved space. We do this because:
		*
		* *) We can end up allocating more system chunks than necessary
		* when there are multiple tasks that are concurrently
		* allocating block groups, which can lead to exhaustion of
		* the system array in the superblock;
		*
		* *) If we allocate extra and unnecessary system block groups,
		* despite being empty for a long time, and possibly forever,
		* they end not being added to the list of unused block groups
		* because that typically happens only when deallocating the
		* last extent from a block group - which never happens since
		* we never allocate from them in the first place. The few
		* exceptions are when mounting a filesystem or running scrub,
		* which add unused block groups to the list of unused block
		* groups, to be deleted by the cleaner kthread.
		* And even when they are added to the list of unused block
		* groups, it can take a long time until they get deleted,
		* since the cleaner kthread might be sleeping or busy with
		* other work (deleting subvolumes, running delayed iputs,
		* defrag scheduling, etc);
		*
		* This is rare in practice, but can happen when too many tasks
		* are allocating blocks groups in parallel (via fallocate())
		* and before the one that reserved space for a new system block
		* group finishes the block group creation and releases the space
		* reserved in excess (at btrfs_create_pending_block_groups()),
		* other tasks end up here and see free system space temporarily
		* not enough for updating the chunk tree.
		*
		* We unlock the chunk mutex before waiting for such tasks and
		* lock it again after the wait, otherwise we would deadlock.
		* It is safe to do so because allocating a system chunk is the
		* first thing done while allocating a new block group.
		*/
		if (reserved > trans->chunk_bytes_reserved) {
		const u64 min_needed = reserved - thresh;

		mutex_unlock(&fs_info->chunk_mutex);
		wait_event(cur_trans->chunk_reserve_wait,
		atomic64_read(&cur_trans->chunk_bytes_reserved) <=
		min_needed);
		mutex_lock(&fs_info->chunk_mutex);
		goto again;
		}

		/*
		* Ignore failure to create system chunk. We might end up not
		@@ -3315,10 +3464,12 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
		ret = btrfs_block_rsv_add(fs_info->chunk_root,
		&fs_info->chunk_block_rsv,
		thresh, BTRFS_RESERVE_NO_FLUSH);
		if (!ret)
		if (!ret) {
		atomic64_add(thresh, &cur_trans->chunk_bytes_reserved);
		trans->chunk_bytes_reserved += thresh;
		}
		}
		}

		void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
		{
		@@ -3386,6 +3537,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
		}
		spin_unlock(&info->unused_bgs_lock);

		spin_lock(&info->unused_bgs_lock);
		while (!list_empty(&info->reclaim_bgs)) {
		block_group = list_first_entry(&info->reclaim_bgs,
		struct btrfs_block_group,
		bg_list);
		list_del_init(&block_group->bg_list);
		btrfs_put_block_group(block_group);
		}
		spin_unlock(&info->unused_bgs_lock);

		spin_lock(&info->block_group_cache_lock);
		while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
		block_group = rb_entry(n, struct btrfs_block_group,

fs/btrfs/block-group.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -264,6 +264,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
		u64 group_start, struct extent_map *em);
		void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
		void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
		void btrfs_reclaim_bgs_work(struct work_struct *work);
		void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info);
		void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg);
		int btrfs_read_block_groups(struct btrfs_fs_info *info);
		int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
		u64 type, u64 chunk_offset, u64 size);

fs/btrfs/btrfs_inode.h

+20 −13

Original line number	Diff line number	Diff line
		@@ -220,6 +220,7 @@ struct btrfs_inode {
		/* Hook into fs_info->delayed_iputs */
		struct list_head delayed_iput;

		struct rw_semaphore i_mmap_lock;
		struct inode vfs_inode;
		};

		@@ -299,24 +300,30 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
		mod);
		}

		static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
		/*
		* Called every time after doing a buffered, direct IO or memory mapped write.
		*
		* This is to ensure that if we write to a file that was previously fsynced in
		* the current transaction, then try to fsync it again in the same transaction,
		* we will know that there were changes in the file and that it needs to be
		* logged.
		*/
		static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
		{
		spin_lock(&inode->lock);
		inode->last_sub_trans = inode->root->log_transid;
		spin_unlock(&inode->lock);
		}

		static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
		{
		int ret = 0;
		bool ret = false;

		spin_lock(&inode->lock);
		if (inode->logged_trans == generation &&
		inode->last_sub_trans <= inode->last_log_commit &&
		inode->last_sub_trans <= inode->root->last_log_commit) {
		/*
		* After a ranged fsync we might have left some extent maps
		* (that fall outside the fsync's range). So return false
		* here if the list isn't empty, to make sure btrfs_log_inode()
		* will be called and process those extent maps.
		*/
		smp_mb();
		if (list_empty(&inode->extent_tree.modified_extents))
		ret = 1;
		}
		inode->last_sub_trans <= inode->root->last_log_commit)
		ret = true;
		spin_unlock(&inode->lock);
		return ret;
		}