Commit 09ce8744 authored by Jens Axboe's avatar Jens Axboe
Browse files

block: use flags instead of bit fields for blkdev_dio



This generates a lot better code for me, and bumps performance from
7650K IOPS to 7750K IOPS. Looking at profiles for the run and running
perf diff, it confirms that we're now sending a lot less time there:

     6.38%     -2.80%  [kernel.vmlinux]  [k] blkdev_direct_IO

Taking it from the 2nd most cycle consumer to only the 9th most at
3.35% of the CPU time.

Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent fac7c6d5
Loading
Loading
Loading
Loading
+20 −14
Original line number Diff line number Diff line
@@ -123,6 +123,12 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
	return ret;
}

enum {
	DIO_MULTI_BIO		= 1,
	DIO_SHOULD_DIRTY	= 2,
	DIO_IS_SYNC		= 4,
};

struct blkdev_dio {
	union {
		struct kiocb		*iocb;
@@ -130,9 +136,7 @@ struct blkdev_dio {
	};
	size_t			size;
	atomic_t		ref;
	bool			multi_bio : 1;
	bool			should_dirty : 1;
	bool			is_sync : 1;
	unsigned int		flags;
	struct bio		bio;
};

@@ -141,13 +145,13 @@ static struct bio_set blkdev_dio_pool;
static void blkdev_bio_end_io(struct bio *bio)
{
	struct blkdev_dio *dio = bio->bi_private;
	bool should_dirty = dio->should_dirty;
	bool should_dirty = dio->flags & DIO_SHOULD_DIRTY;

	if (bio->bi_status && !dio->bio.bi_status)
		dio->bio.bi_status = bio->bi_status;

	if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
		if (!dio->is_sync) {
	if (!(dio->flags & DIO_MULTI_BIO) || atomic_dec_and_test(&dio->ref)) {
		if (!(dio->flags & DIO_IS_SYNC)) {
			struct kiocb *iocb = dio->iocb;
			ssize_t ret;

@@ -161,7 +165,7 @@ static void blkdev_bio_end_io(struct bio *bio)
			}

			dio->iocb->ki_complete(iocb, ret, 0);
			if (dio->multi_bio)
			if (dio->flags & DIO_MULTI_BIO)
				bio_put(&dio->bio);
		} else {
			struct task_struct *waiter = dio->waiter;
@@ -198,17 +202,19 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);

	dio = container_of(bio, struct blkdev_dio, bio);
	dio->is_sync = is_sync = is_sync_kiocb(iocb);
	if (dio->is_sync) {
	is_sync = is_sync_kiocb(iocb);
	if (is_sync) {
		dio->flags = DIO_IS_SYNC;
		dio->waiter = current;
		bio_get(bio);
	} else {
		dio->flags = 0;
		dio->iocb = iocb;
	}

	dio->size = 0;
	dio->multi_bio = false;
	dio->should_dirty = is_read && iter_is_iovec(iter);
	if (is_read && iter_is_iovec(iter))
		dio->flags |= DIO_SHOULD_DIRTY;

	/*
	 * Don't plug for HIPRI/polled IO, as those should go straight
@@ -234,7 +240,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,

		if (is_read) {
			bio->bi_opf = REQ_OP_READ;
			if (dio->should_dirty)
			if (dio->flags & DIO_SHOULD_DIRTY)
				bio_set_pages_dirty(bio);
		} else {
			bio->bi_opf = dio_bio_write_op(iocb);
@@ -255,7 +261,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
				WRITE_ONCE(iocb->private, bio);
			break;
		}
		if (!dio->multi_bio) {
		if (!(dio->flags & DIO_MULTI_BIO)) {
			/*
			 * AIO needs an extra reference to ensure the dio
			 * structure which is embedded into the first bio
@@ -263,7 +269,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
			 */
			if (!is_sync)
				bio_get(bio);
			dio->multi_bio = true;
			dio->flags |= DIO_MULTI_BIO;
			atomic_set(&dio->ref, 2);
			do_poll = false;
		} else {