Merge tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux (cc423f63) · Commits · EulixOS / Software / Kernel

fs/btrfs/async-thread.c

+39 −5

Original line number	Diff line number	Diff line
		@@ -71,6 +71,16 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
		return atomic_read(&wq->pending) > wq->thresh * 2;
		}

		static void btrfs_init_workqueue(struct btrfs_workqueue *wq,
		struct btrfs_fs_info *fs_info)
		{
		wq->fs_info = fs_info;
		atomic_set(&wq->pending, 0);
		INIT_LIST_HEAD(&wq->ordered_list);
		spin_lock_init(&wq->list_lock);
		spin_lock_init(&wq->thres_lock);
		}

		struct btrfs_workqueue btrfs_alloc_workqueue(struct btrfs_fs_info fs_info,
		const char *name, unsigned int flags,
		int limit_active, int thresh)
		@@ -80,9 +90,9 @@ struct btrfs_workqueue btrfs_alloc_workqueue(struct btrfs_fs_info fs_info,
		if (!ret)
		return NULL;

		ret->fs_info = fs_info;
		btrfs_init_workqueue(ret, fs_info);

		ret->limit_active = limit_active;
		atomic_set(&ret->pending, 0);
		if (thresh == 0)
		thresh = DFT_THRESHOLD;
		/* For low threshold, disabling threshold is a better choice */
		@@ -106,9 +116,33 @@ struct btrfs_workqueue btrfs_alloc_workqueue(struct btrfs_fs_info fs_info,
		return NULL;
		}

		INIT_LIST_HEAD(&ret->ordered_list);
		spin_lock_init(&ret->list_lock);
		spin_lock_init(&ret->thres_lock);
		trace_btrfs_workqueue_alloc(ret, name);
		return ret;
		}

		struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
		struct btrfs_fs_info fs_info, const char name,
		unsigned int flags)
		{
		struct btrfs_workqueue *ret;

		ret = kzalloc(sizeof(*ret), GFP_KERNEL);
		if (!ret)
		return NULL;

		btrfs_init_workqueue(ret, fs_info);

		/* Ordered workqueues don't allow @max_active adjustments. */
		ret->limit_active = 1;
		ret->current_active = 1;
		ret->thresh = NO_THRESHOLD;

		ret->normal_wq = alloc_ordered_workqueue("btrfs-%s", flags, name);
		if (!ret->normal_wq) {
		kfree(ret);
		return NULL;
		}

		trace_btrfs_workqueue_alloc(ret, name);
		return ret;
		}

fs/btrfs/async-thread.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -31,6 +31,9 @@ struct btrfs_workqueue btrfs_alloc_workqueue(struct btrfs_fs_info fs_info,
		unsigned int flags,
		int limit_active,
		int thresh);
		struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
		struct btrfs_fs_info fs_info, const char name,
		unsigned int flags);
		void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
		btrfs_func_t ordered_func, btrfs_func_t ordered_free);
		void btrfs_queue_work(struct btrfs_workqueue *wq,

fs/btrfs/bio.c

+68 −54

Original line number	Diff line number	Diff line
		@@ -27,6 +27,17 @@ struct btrfs_failed_bio {
		atomic_t repair_count;
		};

		/* Is this a data path I/O that needs storage layer checksum and repair? */
		static inline bool is_data_bbio(struct btrfs_bio *bbio)
		{
		return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
		}

		static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
		{
		return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE;
		}

		/*
		* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
		* is already initialized by the block layer.
		@@ -61,20 +72,6 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
		return bbio;
		}

		static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio)
		{
		struct btrfs_ordered_extent *ordered;
		int ret;

		ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
		if (WARN_ON_ONCE(!ordered))
		return BLK_STS_IOERR;
		ret = btrfs_extract_ordered_extent(bbio, ordered);
		btrfs_put_ordered_extent(ordered);

		return errno_to_blk_status(ret);
		}

		static struct btrfs_bio btrfs_split_bio(struct btrfs_fs_info fs_info,
		struct btrfs_bio *orig_bbio,
		u64 map_length, bool use_append)
		@@ -95,13 +92,41 @@ static struct btrfs_bio btrfs_split_bio(struct btrfs_fs_info fs_info,
		btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
		bbio->inode = orig_bbio->inode;
		bbio->file_offset = orig_bbio->file_offset;
		if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED))
		orig_bbio->file_offset += map_length;

		if (bbio_has_ordered_extent(bbio)) {
		refcount_inc(&orig_bbio->ordered->refs);
		bbio->ordered = orig_bbio->ordered;
		}
		atomic_inc(&orig_bbio->pending_ios);
		return bbio;
		}

		/* Free a bio that was never submitted to the underlying device. */
		static void btrfs_cleanup_bio(struct btrfs_bio *bbio)
		{
		if (bbio_has_ordered_extent(bbio))
		btrfs_put_ordered_extent(bbio->ordered);
		bio_put(&bbio->bio);
		}

		static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
		{
		if (bbio_has_ordered_extent(bbio)) {
		struct btrfs_ordered_extent *ordered = bbio->ordered;

		bbio->end_io(bbio);
		btrfs_put_ordered_extent(ordered);
		} else {
		bbio->end_io(bbio);
		}
		}

		void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
		{
		bbio->bio.bi_status = status;
		__btrfs_bio_end_io(bbio);
		}

		static void btrfs_orig_write_end_io(struct bio *bio);

		static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
		@@ -130,12 +155,12 @@ static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)

		if (bbio->bio.bi_status)
		btrfs_bbio_propagate_error(bbio, orig_bbio);
		bio_put(&bbio->bio);
		btrfs_cleanup_bio(bbio);
		bbio = orig_bbio;
		}

		if (atomic_dec_and_test(&bbio->pending_ios))
		bbio->end_io(bbio);
		__btrfs_bio_end_io(bbio);
		}

		static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
		@@ -327,7 +352,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
		struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);

		/* Metadata reads are checked and repaired by the submitter. */
		if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
		if (is_data_bbio(bbio))
		btrfs_check_read_bio(bbio, bbio->bio.bi_private);
		else
		btrfs_orig_bbio_end_io(bbio);
		@@ -348,7 +373,7 @@ static void btrfs_simple_end_io(struct bio *bio)
		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
		queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
		} else {
		if (bio_op(bio) == REQ_OP_ZONE_APPEND)
		if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
		btrfs_record_physical_zoned(bbio);
		btrfs_orig_bbio_end_io(bbio);
		}
		@@ -361,8 +386,7 @@ static void btrfs_raid56_end_io(struct bio *bio)

		btrfs_bio_counter_dec(bioc->fs_info);
		bbio->mirror_num = bioc->mirror_num;
		if (bio_op(bio) == REQ_OP_READ && bbio->inode &&
		!(bbio->bio.bi_opf & REQ_META))
		if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
		btrfs_check_read_bio(bbio, NULL);
		else
		btrfs_orig_bbio_end_io(bbio);
		@@ -472,13 +496,12 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
		static void __btrfs_submit_bio(struct bio bio, struct btrfs_io_context bioc,
		struct btrfs_io_stripe *smap, int mirror_num)
		{
		/* Do not leak our private flag into the block layer. */
		bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;

		if (!bioc) {
		/* Single mirror read/write fast path. */
		btrfs_bio(bio)->mirror_num = mirror_num;
		bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
		if (bio_op(bio) != REQ_OP_READ)
		btrfs_bio(bio)->orig_physical = smap->physical;
		bio->bi_private = smap->dev;
		bio->bi_end_io = btrfs_simple_end_io;
		btrfs_submit_dev_bio(smap->dev, bio);
		@@ -574,27 +597,20 @@ static void run_one_async_free(struct btrfs_work *work)

		static bool should_async_write(struct btrfs_bio *bbio)
		{
		/*
		* If the I/O is not issued by fsync and friends, (->sync_writers != 0),
		* then try to defer the submission to a workqueue to parallelize the
		* checksum calculation.
		*/
		if (atomic_read(&bbio->inode->sync_writers))
		/* Submit synchronously if the checksum implementation is fast. */
		if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
		return false;

		/*
		* Submit metadata writes synchronously if the checksum implementation
		* is fast, or we are on a zoned device that wants I/O to be submitted
		* in order.
		* Try to defer the submission to a workqueue to parallelize the
		* checksum calculation unless the I/O is issued synchronously.
		*/
		if (bbio->bio.bi_opf & REQ_META) {
		struct btrfs_fs_info *fs_info = bbio->fs_info;

		if (btrfs_is_zoned(fs_info))
		if (op_is_sync(bbio->bio.bi_opf))
		return false;
		if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))

		/* Zoned devices require I/O to be submitted in order. */
		if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
		return false;
		}

		return true;
		}
		@@ -622,9 +638,6 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,

		btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
		run_one_async_free);
		if (op_is_sync(bbio->bio.bi_opf))
		btrfs_queue_work(fs_info->hipri_workers, &async->work);
		else
		btrfs_queue_work(fs_info->workers, &async->work);
		return true;
		}
		@@ -635,7 +648,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
		struct btrfs_fs_info *fs_info = bbio->fs_info;
		struct btrfs_bio *orig_bbio = bbio;
		struct bio *bio = &bbio->bio;
		u64 logical = bio->bi_iter.bi_sector << 9;
		u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
		u64 length = bio->bi_iter.bi_size;
		u64 map_length = length;
		bool use_append = btrfs_use_zone_append(bbio);
		@@ -645,7 +658,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
		int error;

		btrfs_bio_counter_inc_blocked(fs_info);
		error = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
		error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
		&bioc, &smap, &mirror_num, 1);
		if (error) {
		ret = errno_to_blk_status(error);
		@@ -665,7 +678,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
		* Save the iter for the end_io handler and preload the checksums for
		* data reads.
		*/
		if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) {
		if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
		bbio->saved_iter = bio->bi_iter;
		ret = btrfs_lookup_bio_sums(bbio);
		if (ret)
		@@ -676,9 +689,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
		if (use_append) {
		bio->bi_opf &= ~REQ_OP_WRITE;
		bio->bi_opf \|= REQ_OP_ZONE_APPEND;
		ret = btrfs_bio_extract_ordered_extent(bbio);
		if (ret)
		goto fail_put_bio;
		}

		/*
		@@ -695,6 +705,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
		ret = btrfs_bio_csum(bbio);
		if (ret)
		goto fail_put_bio;
		} else if (use_append) {
		ret = btrfs_alloc_dummy_sum(bbio);
		if (ret)
		goto fail_put_bio;
		}
		}

		@@ -704,7 +718,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)

		fail_put_bio:
		if (map_length < length)
		bio_put(bio);
		btrfs_cleanup_bio(bbio);
		fail:
		btrfs_bio_counter_dec(fs_info);
		btrfs_bio_end_io(orig_bbio, ret);

fs/btrfs/bio.h

+17 −12

Original line number	Diff line number	Diff line
		@@ -39,8 +39,8 @@ struct btrfs_bio {

		union {
		/*
		* Data checksumming and original I/O information for internal
		* use in the btrfs_submit_bio machinery.
		* For data reads: checksumming and original I/O information.
		* (for internal use in the btrfs_submit_bio machinery only)
		*/
		struct {
		u8 *csum;
		@@ -48,7 +48,20 @@ struct btrfs_bio {
		struct bvec_iter saved_iter;
		};

		/* For metadata parentness verification. */
		/*
		* For data writes:
		* - ordered extent covering the bio
		* - pointer to the checksums for this bio
		* - original physical address from the allocator
		* (for zone append only)
		*/
		struct {
		struct btrfs_ordered_extent *ordered;
		struct btrfs_ordered_sum *sums;
		u64 orig_physical;
		};

		/* For metadata reads: parentness verification. */
		struct btrfs_tree_parent_check parent_check;
		};

		@@ -84,15 +97,7 @@ void btrfs_bio_init(struct btrfs_bio bbio, struct btrfs_fs_info fs_info,
		struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
		struct btrfs_fs_info *fs_info,
		btrfs_bio_end_io_t end_io, void *private);

		static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
		{
		bbio->bio.bi_status = status;
		bbio->end_io(bbio);
		}

		/* Bio only refers to one ordered extent. */
		#define REQ_BTRFS_ONE_ORDERED REQ_DRV
		void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);

		/* Submit using blkcg_punt_bio_submit. */
		#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE

fs/btrfs/block-group.c

+40 −7

Original line number	Diff line number	Diff line
		@@ -95,14 +95,21 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
		}
		allowed &= flags;

		if (allowed & BTRFS_BLOCK_GROUP_RAID6)
		/* Select the highest-redundancy RAID level. */
		if (allowed & BTRFS_BLOCK_GROUP_RAID1C4)
		allowed = BTRFS_BLOCK_GROUP_RAID1C4;
		else if (allowed & BTRFS_BLOCK_GROUP_RAID6)
		allowed = BTRFS_BLOCK_GROUP_RAID6;
		else if (allowed & BTRFS_BLOCK_GROUP_RAID1C3)
		allowed = BTRFS_BLOCK_GROUP_RAID1C3;
		else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
		allowed = BTRFS_BLOCK_GROUP_RAID5;
		else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
		allowed = BTRFS_BLOCK_GROUP_RAID10;
		else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
		allowed = BTRFS_BLOCK_GROUP_RAID1;
		else if (allowed & BTRFS_BLOCK_GROUP_DUP)
		allowed = BTRFS_BLOCK_GROUP_DUP;
		else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
		allowed = BTRFS_BLOCK_GROUP_RAID0;

		@@ -1633,11 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
		{
		struct btrfs_fs_info *fs_info = bg->fs_info;

		trace_btrfs_add_unused_block_group(bg);
		spin_lock(&fs_info->unused_bgs_lock);
		if (list_empty(&bg->bg_list)) {
		btrfs_get_block_group(bg);
		trace_btrfs_add_unused_block_group(bg);
		list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
		} else {
		/* Pull out the block group from the reclaim_bgs list. */
		list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
		}
		spin_unlock(&fs_info->unused_bgs_lock);
		}
		@@ -1791,8 +1801,15 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
		}
		spin_unlock(&bg->lock);

		/* Get out fast, in case we're unmounting the filesystem */
		if (btrfs_fs_closing(fs_info)) {
		/*
		* Get out fast, in case we're read-only or unmounting the
		* filesystem. It is OK to drop block groups from the list even
		* for the read-only case. As we did sb_start_write(),
		* "mount -o remount,ro" won't happen and read-only filesystem
		* means it is forced read-only due to a fatal error. So, it
		* never gets back to read-write to let us reclaim again.
		*/
		if (btrfs_need_cleaner_sleep(fs_info)) {
		up_write(&space_info->groups_sem);
		goto next;
		}
		@@ -1823,11 +1840,27 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
		}

		next:
		if (ret)
		btrfs_mark_bg_to_reclaim(bg);
		btrfs_put_block_group(bg);

		mutex_unlock(&fs_info->reclaim_bgs_lock);
		/*
		* Reclaiming all the block groups in the list can take really
		* long. Prioritize cleaning up unused block groups.
		*/
		btrfs_delete_unused_bgs(fs_info);
		/*
		* If we are interrupted by a balance, we can just bail out. The
		* cleaner thread restart again if necessary.
		*/
		if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
		goto end;
		spin_lock(&fs_info->unused_bgs_lock);
		}
		spin_unlock(&fs_info->unused_bgs_lock);
		mutex_unlock(&fs_info->reclaim_bgs_lock);
		end:
		btrfs_exclop_finish(fs_info);
		sb_end_write(fs_info->sb);
		}
		@@ -3521,9 +3554,9 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
		spin_unlock(&cache->lock);
		spin_unlock(&space_info->lock);

		set_extent_dirty(&trans->transaction->pinned_extents,
		set_extent_bit(&trans->transaction->pinned_extents,
		bytenr, bytenr + num_bytes - 1,
		GFP_NOFS \| __GFP_NOFAIL);
		EXTENT_DIRTY, NULL);
		}

		spin_lock(&trans->transaction->dirty_bgs_lock);