iomap: support IOCB_DIO_CALLER_COMP (8c052fb3) · Commits · EulixOS / Software / Kernel

fs/iomap/direct-io.c

+60 −2

Original line number	Diff line number	Diff line
		@@ -20,6 +20,7 @@
		* Private flags for iomap_dio, must not overlap with the public ones in
		* iomap.h:
		*/
		#define IOMAP_DIO_CALLER_COMP (1U << 26)
		#define IOMAP_DIO_INLINE_COMP (1U << 27)
		#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
		#define IOMAP_DIO_NEED_SYNC (1U << 29)
		@@ -132,6 +133,11 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
		}
		EXPORT_SYMBOL_GPL(iomap_dio_complete);

		static ssize_t iomap_dio_deferred_complete(void *data)
		{
		return iomap_dio_complete(data);
		}

		static void iomap_dio_complete_work(struct work_struct *work)
		{
		struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
		@@ -182,6 +188,31 @@ void iomap_dio_bio_end_io(struct bio *bio)
		goto release_bio;
		}

		/*
		* If this dio is flagged with IOMAP_DIO_CALLER_COMP, then schedule
		* our completion that way to avoid an async punt to a workqueue.
		*/
		if (dio->flags & IOMAP_DIO_CALLER_COMP) {
		/* only polled IO cares about private cleared */
		iocb->private = dio;
		iocb->dio_complete = iomap_dio_deferred_complete;

		/*
		* Invoke ->ki_complete() directly. We've assigned our
		* dio_complete callback handler, and since the issuer set
		* IOCB_DIO_CALLER_COMP, we know their ki_complete handler will
		* notice ->dio_complete being set and will defer calling that
		* handler until it can be done from a safe task context.
		*
		* Note that the 'res' being passed in here is not important
		* for this case. The actual completion value of the request
		* will be gotten from dio_complete when that is run by the
		* issuer.
		*/
		iocb->ki_complete(iocb, 0);
		goto release_bio;
		}

		/*
		* Async DIO completion that requires filesystem level completion work
		* gets punted to a work queue to complete as the operation may require
		@@ -278,12 +309,17 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
		* after IO completion such as unwritten extent conversion) and
		* the underlying device either supports FUA or doesn't have
		* a volatile write cache. This allows us to avoid cache flushes
		* on IO completion.
		* on IO completion. If we can't use writethrough and need to
		* sync, disable in-task completions as dio completion will
		* need to call generic_write_sync() which will do a blocking
		* fsync / cache flush call.
		*/
		if (!(iomap->flags & (IOMAP_F_SHARED\|IOMAP_F_DIRTY)) &&
		(dio->flags & IOMAP_DIO_WRITE_THROUGH) &&
		(bdev_fua(iomap->bdev) \|\| !bdev_write_cache(iomap->bdev)))
		use_fua = true;
		else if (dio->flags & IOMAP_DIO_NEED_SYNC)
		dio->flags &= ~IOMAP_DIO_CALLER_COMP;
		}

		/*
		@@ -298,10 +334,23 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
		goto out;

		/*
		* We can only poll for single bio I/Os.
		* We can only do deferred completion for pure overwrites that
		* don't require additional IO at completion. This rules out
		* writes that need zeroing or extent conversion, extend
		* the file size, or issue journal IO or cache flushes
		* during completion processing.
		*/
		if (need_zeroout \|\|
		((dio->flags & IOMAP_DIO_NEED_SYNC) && !use_fua) \|\|
		((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode)))
		dio->flags &= ~IOMAP_DIO_CALLER_COMP;

		/*
		* The rules for polled IO completions follow the guidelines as the
		* ones we set for inline and deferred completions. If none of those
		* are available for this IO, clear the polled flag.
		*/
		if (!(dio->flags & (IOMAP_DIO_INLINE_COMP\|IOMAP_DIO_CALLER_COMP)))
		dio->iocb->ki_flags &= ~IOCB_HIPRI;

		if (need_zeroout) {
		@@ -547,6 +596,15 @@ __iomap_dio_rw(struct kiocb iocb, struct iov_iter iter,
		iomi.flags \|= IOMAP_WRITE;
		dio->flags \|= IOMAP_DIO_WRITE;

		/*
		* Flag as supporting deferred completions, if the issuer
		* groks it. This can avoid a workqueue punt for writes.
		* We may later clear this flag if we need to do other IO
		* as part of this IO completion.
		*/
		if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
		dio->flags \|= IOMAP_DIO_CALLER_COMP;

		if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
		ret = -EAGAIN;
		if (iomi.pos >= dio->i_size \|\|