Commit 8fb0f47a authored by Jens Axboe's avatar Jens Axboe
Browse files

iov_iter: add helper to save iov_iter state

In an ideal world, when someone is passed an iov_iter and returns X bytes,
then X bytes would have been consumed/advanced from the iov_iter. But we
have use cases that always consume the entire iterator, a few examples
of that are iomap and bdev O_DIRECT. This means we cannot rely on the
state of the iov_iter once we've called ->read_iter() or ->write_iter().

This would be easier if we didn't always have to deal with truncate of
the iov_iter, as rewinding would be trivial without that. We recently
added a commit to track the truncate state, but that grew the iov_iter
by 8 bytes and wasn't the best solution.

Implement a helper to save enough of the iov_iter state to sanely restore
it after we've called the read/write iterator helpers. This currently
only works for IOVEC/BVEC/KVEC as that's all we need, support for other
iterator types are left as an exercise for the reader.

Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wiacKV4Gh-MYjteU0LwNBSGpWrK-Ov25HdqB1ewinrFPg@mail.gmail.com/


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent d6c338a7
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -27,6 +27,12 @@ enum iter_type {
	ITER_DISCARD,
};

struct iov_iter_state {
	size_t iov_offset;
	size_t count;
	unsigned long nr_segs;
};

struct iov_iter {
	u8 iter_type;
	bool data_source;
@@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
	return i->iter_type;
}

static inline void iov_iter_save_state(struct iov_iter *iter,
				       struct iov_iter_state *state)
{
	state->iov_offset = iter->iov_offset;
	state->count = iter->count;
	state->nr_segs = iter->nr_segs;
}

static inline bool iter_is_iovec(const struct iov_iter *i)
{
	return iov_iter_type(i) == ITER_IOVEC;
@@ -233,6 +247,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
			size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);

const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);

+36 −0
Original line number Diff line number Diff line
@@ -1972,3 +1972,39 @@ int import_single_range(int rw, void __user *buf, size_t len,
	return 0;
}
EXPORT_SYMBOL(import_single_range);

/**
 * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
 *     iov_iter_save_state() was called.
 *
 * @i: &struct iov_iter to restore
 * @state: state to restore from
 *
 * Used after iov_iter_save_state() to bring restore @i, if operations may
 * have advanced it.
 *
 * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
 */
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
{
	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
			 !iov_iter_is_kvec(i))
		return;
	i->iov_offset = state->iov_offset;
	i->count = state->count;
	/*
	 * For the *vec iters, nr_segs + iov is constant - if we increment
	 * the vec, then we also decrement the nr_segs count. Hence we don't
	 * need to track both of these, just one is enough and we can deduct
	 * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
	 * size, so we can just increment the iov pointer as they are unionzed.
	 * ITER_BVEC _may_ be the same size on some archs, but on others it is
	 * not. Be safe and handle it separately.
	 */
	BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
	if (iov_iter_is_bvec(i))
		i->bvec -= state->nr_segs - i->nr_segs;
	else
		i->iov -= state->nr_segs - i->nr_segs;
	i->nr_segs = state->nr_segs;
}