Commit d7b9416f authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba
Browse files

btrfs: remove btrfs_end_io_wq



All reads bio that go through btrfs_map_bio need to be completed in
user context.  And read I/Os are the most common and timing critical
in almost any file system workloads.

Embed a work_struct into struct btrfs_bio and use it to complete all
read bios submitted through btrfs_map, using the REQ_META flag to decide
which workqueue they are placed on.

This removes the need for a separate 128 byte allocation (typically
rounded up to 192 bytes by slab) for all reads with a size increase
of 24 bytes for struct btrfs_bio.  Future patches will reorganize
struct btrfs_bio to make use of this extra space for writes as well.

(All sizes are based a on typical 64-bit non-debug build)

Reviewed-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 08a6f464
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -931,10 +931,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
			sums += fs_info->csum_size * nr_sectors;

			ASSERT(comp_bio->bi_iter.bi_size);
			ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
						  BTRFS_WQ_ENDIO_DATA);
			if (ret)
				goto finish_cb;
			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
			if (ret)
				goto finish_cb;
+2 −2
Original line number Diff line number Diff line
@@ -850,8 +850,8 @@ struct btrfs_fs_info {
	struct btrfs_workqueue *hipri_workers;
	struct btrfs_workqueue *delalloc_workers;
	struct btrfs_workqueue *flush_workers;
	struct btrfs_workqueue *endio_workers;
	struct btrfs_workqueue *endio_meta_workers;
	struct workqueue_struct *endio_workers;
	struct workqueue_struct *endio_meta_workers;
	struct workqueue_struct *endio_raid56_workers;
	struct workqueue_struct *rmw_workers;
	struct workqueue_struct *compressed_write_workers;
+7 −113
Original line number Diff line number Diff line
@@ -51,7 +51,6 @@
				 BTRFS_SUPER_FLAG_METADUMP |\
				 BTRFS_SUPER_FLAG_METADUMP_V2)

static void end_workqueue_fn(struct btrfs_work *work);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
				      struct btrfs_fs_info *fs_info);
@@ -64,40 +63,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);

/*
 * btrfs_end_io_wq structs are used to do processing in task context when an IO
 * is complete.  This is used during reads to verify checksums, and it is used
 * by writes to insert metadata for new file extents after IO is complete.
 */
struct btrfs_end_io_wq {
	struct bio *bio;
	bio_end_io_t *end_io;
	void *private;
	struct btrfs_fs_info *info;
	blk_status_t status;
	enum btrfs_wq_endio_type metadata;
	struct btrfs_work work;
};

static struct kmem_cache *btrfs_end_io_wq_cache;

int __init btrfs_end_io_wq_init(void)
{
	btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq",
					sizeof(struct btrfs_end_io_wq),
					0,
					SLAB_MEM_SPREAD,
					NULL);
	if (!btrfs_end_io_wq_cache)
		return -ENOMEM;
	return 0;
}

void __cold btrfs_end_io_wq_exit(void)
{
	kmem_cache_destroy(btrfs_end_io_wq_cache);
}

static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
{
	if (fs_info->csum_shash)
@@ -740,48 +705,6 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
	return ret;
}

static void end_workqueue_bio(struct bio *bio)
{
	struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
	struct btrfs_fs_info *fs_info;
	struct btrfs_workqueue *wq;

	fs_info = end_io_wq->info;
	end_io_wq->status = bio->bi_status;

	if (end_io_wq->metadata)
		wq = fs_info->endio_meta_workers;
	else
		wq = fs_info->endio_workers;

	btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
	btrfs_queue_work(wq, &end_io_wq->work);
}

blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
			enum btrfs_wq_endio_type metadata)
{
	struct btrfs_end_io_wq *end_io_wq;

	if (WARN_ON_ONCE(btrfs_op(bio) != BTRFS_MAP_WRITE))
		return BLK_STS_IOERR;

	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
	if (!end_io_wq)
		return BLK_STS_RESOURCE;

	end_io_wq->private = bio->bi_private;
	end_io_wq->end_io = bio->bi_end_io;
	end_io_wq->info = info;
	end_io_wq->status = 0;
	end_io_wq->bio = bio;
	end_io_wq->metadata = metadata;

	bio->bi_private = end_io_wq;
	bio->bi_end_io = end_workqueue_bio;
	return 0;
}

static void run_one_async_start(struct btrfs_work *work)
{
	struct async_submit_bio *async;
@@ -917,13 +840,6 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
	bio->bi_opf |= REQ_META;

	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
		/*
		 * called for a read, do the setup so that checksum validation
		 * can happen in the async kernel threads
		 */
		ret = btrfs_bio_wq_end_io(fs_info, bio,
					  BTRFS_WQ_ENDIO_METADATA);
		if (!ret)
		ret = btrfs_map_bio(fs_info, bio, mirror_num);
	} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
		ret = btree_csum_one_bio(bio);
@@ -1947,25 +1863,6 @@ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
	return root;
}

/*
 * called by the kthread helper functions to finally call the bio end_io
 * functions.  This is where read checksum verification actually happens
 */
static void end_workqueue_fn(struct btrfs_work *work)
{
	struct bio *bio;
	struct btrfs_end_io_wq *end_io_wq;

	end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
	bio = end_io_wq->bio;

	bio->bi_status = end_io_wq->status;
	bio->bi_private = end_io_wq->private;
	bio->bi_end_io = end_io_wq->end_io;
	bio_endio(bio);
	kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
}

static int cleaner_kthread(void *arg)
{
	struct btrfs_fs_info *fs_info = arg;
@@ -2272,7 +2169,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
	btrfs_destroy_workqueue(fs_info->delalloc_workers);
	btrfs_destroy_workqueue(fs_info->hipri_workers);
	btrfs_destroy_workqueue(fs_info->workers);
	btrfs_destroy_workqueue(fs_info->endio_workers);
	if (fs_info->endio_workers)
		destroy_workqueue(fs_info->endio_workers);
	if (fs_info->endio_raid56_workers)
		destroy_workqueue(fs_info->endio_raid56_workers);
	if (fs_info->rmw_workers)
@@ -2292,7 +2190,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
	 * the queues used for metadata I/O, since tasks from those other work
	 * queues can do metadata I/O operations.
	 */
	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
	if (fs_info->endio_meta_workers)
		destroy_workqueue(fs_info->endio_meta_workers);
}

static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2471,15 +2370,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
	fs_info->fixup_workers =
		btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);

	/*
	 * endios are largely parallel and should have a very
	 * low idle thresh
	 */
	fs_info->endio_workers =
		btrfs_alloc_workqueue(fs_info, "endio", flags, max_active, 4);
		alloc_workqueue("btrfs-endio", flags, max_active);
	fs_info->endio_meta_workers =
		btrfs_alloc_workqueue(fs_info, "endio-meta", flags,
				      max_active, 4);
		alloc_workqueue("btrfs-endio-meta", flags, max_active);
	fs_info->endio_raid56_workers =
		alloc_workqueue("btrfs-endio-raid56", flags, max_active);
	fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
+0 −10
Original line number Diff line number Diff line
@@ -17,12 +17,6 @@
 */
#define BTRFS_BDEV_BLOCKSIZE	(4096)

enum btrfs_wq_endio_type {
	BTRFS_WQ_ENDIO_DATA,
	BTRFS_WQ_ENDIO_METADATA,
	BTRFS_WQ_ENDIO_FREE_SPACE,
};

static inline u64 btrfs_sb_offset(int mirror)
{
	u64 start = SZ_16K;
@@ -120,8 +114,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
			  int atomic);
int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
			     int level, struct btrfs_key *first_key);
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
			enum btrfs_wq_endio_type metadata);
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
				 int mirror_num, u64 dio_file_offset,
				 extent_submit_bio_start_t *submit_bio_start);
@@ -144,8 +136,6 @@ int btree_lock_page_hook(struct page *page, void *data,
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
int btrfs_init_root_free_objectid(struct btrfs_root *root);
int __init btrfs_end_io_wq_init(void);
void __cold btrfs_end_io_wq_exit(void);

#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(u64 objectid,
+1 −23
Original line number Diff line number Diff line
@@ -2640,12 +2640,6 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
		return;
	}

	ret = btrfs_bio_wq_end_io(fs_info, bio,
			btrfs_is_free_space_inode(BTRFS_I(inode)) ?
			BTRFS_WQ_ENDIO_FREE_SPACE : BTRFS_WQ_ENDIO_DATA);
	if (ret)
		goto out;

	/*
	 * Lookup bio sums does extra checks around whether we need to csum or
	 * not, which is why we ignore skip_sum here.
@@ -7879,9 +7873,6 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,

	BUG_ON(bio_op(bio) == REQ_OP_WRITE);

	if (btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA))
		return;

	refcount_inc(&dip->refs);
	if (btrfs_map_bio(fs_info, bio, mirror_num))
		refcount_dec(&dip->refs);
@@ -7970,19 +7961,12 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
{
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
	struct btrfs_dio_private *dip = bio->bi_private;
	bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
	blk_status_t ret;

	if (!write) {
		ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
		if (ret)
			return ret;
	}

	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
		goto map;

	if (write) {
	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
		/* Check btrfs_submit_data_write_bio() for async submit rules */
		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
			return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
@@ -10314,12 +10298,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
			return ret;
	}

	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
	if (ret) {
		btrfs_bio_free_csum(bbio);
		return ret;
	}

	atomic_inc(&priv->pending);
	ret = btrfs_map_bio(fs_info, bio, mirror_num);
	if (ret) {
Loading