Commit c3d4ed1a authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Darrick J. Wong
Browse files

iomap: Allow filesystem to call iomap_dio_complete without i_rwsem



This is to avoid the deadlock caused in btrfs because of O_DIRECT |
O_DSYNC.

Filesystems such as btrfs require i_rwsem while performing sync on a
file. iomap_dio_rw() is called under i_rw_sem. This leads to a
deadlock because of:

iomap_dio_complete()
  generic_write_sync()
    btrfs_sync_file()

Separate out iomap_dio_complete() from iomap_dio_rw(), so filesystems
can call iomap_dio_complete() after unlocking i_rwsem.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarGoldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 4595a298
Loading
Loading
Loading
Loading
+26 −9
Original line number Diff line number Diff line
@@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
		dio->submit.cookie = submit_bio(bio);
}

static ssize_t iomap_dio_complete(struct iomap_dio *dio)
ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
	const struct iomap_dio_ops *dops = dio->dops;
	struct kiocb *iocb = dio->iocb;
@@ -130,6 +130,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)

	return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);

static void iomap_dio_complete_work(struct work_struct *work)
{
@@ -416,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 * Returns -ENOTBLK In case of a page invalidation invalidation failure for
 * writes.  The callers needs to fall back to buffered I/O in this case.
 */
ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_dio *
__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
		bool wait_for_completion)
{
@@ -431,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
	struct iomap_dio *dio;

	if (!count)
		return 0;
		return NULL;

	if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
		return -EIO;
		return ERR_PTR(-EIO);

	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
	if (!dio)
		return -ENOMEM;
		return ERR_PTR(-ENOMEM);

	dio->iocb = iocb;
	atomic_set(&dio->ref, 1);
@@ -568,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
	dio->wait_for_completion = wait_for_completion;
	if (!atomic_dec_and_test(&dio->ref)) {
		if (!wait_for_completion)
			return -EIOCBQUEUED;
			return ERR_PTR(-EIOCBQUEUED);

		for (;;) {
			set_current_state(TASK_UNINTERRUPTIBLE);
@@ -584,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
		__set_current_state(TASK_RUNNING);
	}

	return iomap_dio_complete(dio);
	return dio;

out_free_dio:
	kfree(dio);
	return ret;
	if (ret)
		return ERR_PTR(ret);
	return NULL;
}
EXPORT_SYMBOL_GPL(__iomap_dio_rw);

ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
		bool wait_for_completion)
{
	struct iomap_dio *dio;

	dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
	if (IS_ERR_OR_NULL(dio))
		return PTR_ERR_OR_ZERO(dio);
	return iomap_dio_complete(dio);
}
EXPORT_SYMBOL_GPL(iomap_dio_rw);
+5 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
struct address_space;
struct fiemap_extent_info;
struct inode;
struct iomap_dio;
struct iomap_writepage_ctx;
struct iov_iter;
struct kiocb;
@@ -258,6 +259,10 @@ struct iomap_dio_ops {
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
		bool wait_for_completion);
struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
		bool wait_for_completion);
ssize_t iomap_dio_complete(struct iomap_dio *dio);
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);

#ifdef CONFIG_SWAP