Commit 3b629f8d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-bio-cache.5-2021-08-30' of git://git.kernel.dk/linux-block

Pull support for struct bio recycling from Jens Axboe:
 "This adds bio recycling support for polled IO, allowing quick reuse of
  a bio for high IOPS scenarios via a percpu bio_set list.

  It's good for almost a 10% improvement in performance, bumping our
  per-core IO limit from ~3.2M IOPS to ~3.5M IOPS"

* tag 'io_uring-bio-cache.5-2021-08-30' of git://git.kernel.dk/linux-block:
  bio: improve kerneldoc documentation for bio_alloc_kiocb()
  block: provide bio_clear_hipri() helper
  block: use the percpu bio cache in __blkdev_direct_IO
  io_uring: enable use of bio alloc cache
  block: clear BIO_PERCPU_CACHE flag if polling isn't supported
  bio: add allocation cache abstraction
  fs: add kiocb alloc cache flag
  bio: optimize initialization of a bio
parents c547d89a 3d5b3fbe
Loading
Loading
Loading
Loading
+153 −16
Original line number Diff line number Diff line
@@ -25,6 +25,11 @@
#include "blk.h"
#include "blk-rq-qos.h"

struct bio_alloc_cache {
	struct bio_list		free_list;
	unsigned int		nr;
};

static struct biovec_slab {
	int nr_vecs;
	char *name;
@@ -246,12 +251,40 @@ static void bio_free(struct bio *bio)
void bio_init(struct bio *bio, struct bio_vec *table,
	      unsigned short max_vecs)
{
	memset(bio, 0, sizeof(*bio));
	bio->bi_next = NULL;
	bio->bi_bdev = NULL;
	bio->bi_opf = 0;
	bio->bi_flags = 0;
	bio->bi_ioprio = 0;
	bio->bi_write_hint = 0;
	bio->bi_status = 0;
	bio->bi_iter.bi_sector = 0;
	bio->bi_iter.bi_size = 0;
	bio->bi_iter.bi_idx = 0;
	bio->bi_iter.bi_bvec_done = 0;
	bio->bi_end_io = NULL;
	bio->bi_private = NULL;
#ifdef CONFIG_BLK_CGROUP
	bio->bi_blkg = NULL;
	bio->bi_issue.value = 0;
#ifdef CONFIG_BLK_CGROUP_IOCOST
	bio->bi_iocost_cost = 0;
#endif
#endif
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
	bio->bi_crypt_context = NULL;
#endif
#ifdef CONFIG_BLK_DEV_INTEGRITY
	bio->bi_integrity = NULL;
#endif
	bio->bi_vcnt = 0;

	atomic_set(&bio->__bi_remaining, 1);
	atomic_set(&bio->__bi_cnt, 1);

	bio->bi_io_vec = table;
	bio->bi_max_vecs = max_vecs;
	bio->bi_io_vec = table;
	bio->bi_pool = NULL;
}
EXPORT_SYMBOL(bio_init);

@@ -586,6 +619,53 @@ void guard_bio_eod(struct bio *bio)
	bio_truncate(bio, maxsector << 9);
}

#define ALLOC_CACHE_MAX		512
#define ALLOC_CACHE_SLACK	 64

static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
				  unsigned int nr)
{
	unsigned int i = 0;
	struct bio *bio;

	while ((bio = bio_list_pop(&cache->free_list)) != NULL) {
		cache->nr--;
		bio_free(bio);
		if (++i == nr)
			break;
	}
}

static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node)
{
	struct bio_set *bs;

	bs = hlist_entry_safe(node, struct bio_set, cpuhp_dead);
	if (bs->cache) {
		struct bio_alloc_cache *cache = per_cpu_ptr(bs->cache, cpu);

		bio_alloc_cache_prune(cache, -1U);
	}
	return 0;
}

static void bio_alloc_cache_destroy(struct bio_set *bs)
{
	int cpu;

	if (!bs->cache)
		return;

	cpuhp_state_remove_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
	for_each_possible_cpu(cpu) {
		struct bio_alloc_cache *cache;

		cache = per_cpu_ptr(bs->cache, cpu);
		bio_alloc_cache_prune(cache, -1U);
	}
	free_percpu(bs->cache);
}

/**
 * bio_put - release a reference to a bio
 * @bio:   bio to release reference to
@@ -596,15 +676,22 @@ void guard_bio_eod(struct bio *bio)
 **/
void bio_put(struct bio *bio)
{
	if (!bio_flagged(bio, BIO_REFFED))
		bio_free(bio);
	else {
	if (unlikely(bio_flagged(bio, BIO_REFFED))) {
		BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
		if (!atomic_dec_and_test(&bio->__bi_cnt))
			return;
	}

		/*
		 * last put frees it
		 */
		if (atomic_dec_and_test(&bio->__bi_cnt))
	if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
		struct bio_alloc_cache *cache;

		bio_uninit(bio);
		cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
		bio_list_add_head(&cache->free_list, bio);
		if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
			bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
		put_cpu();
	} else {
		bio_free(bio);
	}
}
@@ -1496,6 +1583,7 @@ int biovec_init_pool(mempool_t *pool, int pool_entries)
 */
void bioset_exit(struct bio_set *bs)
{
	bio_alloc_cache_destroy(bs);
	if (bs->rescue_workqueue)
		destroy_workqueue(bs->rescue_workqueue);
	bs->rescue_workqueue = NULL;
@@ -1557,12 +1645,18 @@ int bioset_init(struct bio_set *bs,
	    biovec_init_pool(&bs->bvec_pool, pool_size))
		goto bad;

	if (!(flags & BIOSET_NEED_RESCUER))
		return 0;

	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
	if (flags & BIOSET_NEED_RESCUER) {
		bs->rescue_workqueue = alloc_workqueue("bioset",
							WQ_MEM_RECLAIM, 0);
		if (!bs->rescue_workqueue)
			goto bad;
	}
	if (flags & BIOSET_PERCPU_CACHE) {
		bs->cache = alloc_percpu(struct bio_alloc_cache);
		if (!bs->cache)
			goto bad;
		cpuhp_state_add_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead);
	}

	return 0;
bad:
@@ -1589,6 +1683,46 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
}
EXPORT_SYMBOL(bioset_init_from_src);

/**
 * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
 * @kiocb:	kiocb describing the IO
 * @nr_iovecs:	number of iovecs to pre-allocate
 * @bs:		bio_set to allocate from
 *
 * Description:
 *    Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
 *    used to check if we should dip into the per-cpu bio_set allocation
 *    cache. The allocation uses GFP_KERNEL internally. On return, the
 *    bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
 *    MUST be done from process context, not hard/soft IRQ.
 *
 */
struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
			    struct bio_set *bs)
{
	struct bio_alloc_cache *cache;
	struct bio *bio;

	if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
		return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);

	cache = per_cpu_ptr(bs->cache, get_cpu());
	bio = bio_list_pop(&cache->free_list);
	if (bio) {
		cache->nr--;
		put_cpu();
		bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
		bio->bi_pool = bs;
		bio_set_flag(bio, BIO_PERCPU_CACHE);
		return bio;
	}
	put_cpu();
	bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
	bio_set_flag(bio, BIO_PERCPU_CACHE);
	return bio;
}
EXPORT_SYMBOL_GPL(bio_alloc_kiocb);

static int __init init_bio(void)
{
	int i;
@@ -1603,6 +1737,9 @@ static int __init init_bio(void)
				SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
	}

	cpuhp_setup_state_multi(CPUHP_BIO_DEAD, "block/bio:dead", NULL,
					bio_cpu_dead);

	if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
		panic("bio: can't allocate bios\n");

+1 −1
Original line number Diff line number Diff line
@@ -821,7 +821,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
	}

	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
		bio->bi_opf &= ~REQ_HIPRI;
		bio_clear_hipri(bio);

	switch (bio_op(bio)) {
	case REQ_OP_DISCARD:
+1 −1
Original line number Diff line number Diff line
@@ -285,7 +285,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
	 * iopoll in direct IO routine. Given performance gain of iopoll for
	 * big IO can be trival, disable iopoll when split needed.
	 */
	bio->bi_opf &= ~REQ_HIPRI;
	bio_clear_hipri(bio);

	return bio_split(bio, sectors, GFP_NOIO, bs);
}
+7 −0
Original line number Diff line number Diff line
@@ -366,4 +366,11 @@ extern struct device_attribute dev_attr_events;
extern struct device_attribute dev_attr_events_async;
extern struct device_attribute dev_attr_events_poll_msecs;

static inline void bio_clear_hipri(struct bio *bio)
{
	/* can't support alloc cache if we turn off polling */
	bio_clear_flag(bio, BIO_PERCPU_CACHE);
	bio->bi_opf &= ~REQ_HIPRI;
}

#endif /* BLK_INTERNAL_H */
+4 −2
Original line number Diff line number Diff line
@@ -386,7 +386,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
	    (bdev_logical_block_size(bdev) - 1))
		return -EINVAL;

	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);

	dio = container_of(bio, struct blkdev_dio, bio);
	dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -514,7 +514,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)

static __init int blkdev_init(void)
{
	return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
	return bioset_init(&blkdev_dio_pool, 4,
				offsetof(struct blkdev_dio, bio),
				BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
}
module_init(blkdev_init);

Loading