Commit d3acb15a authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull iov_iter updates from Al Viro:
 "iov_iter cleanups and fixes.

  There are followups, but this is what had sat in -next this cycle. IMO
  the macro forest in there became much thinner and easier to follow..."

* 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (37 commits)
  csum_and_copy_to_pipe_iter(): leave handling of csum_state to caller
  clean up copy_mc_pipe_to_iter()
  pipe_zero(): we don't need no stinkin' kmap_atomic()...
  iov_iter: clean csum_and_copy_...() primitives up a bit
  copy_page_from_iter(): don't need kmap_atomic() for kvec/bvec cases
  copy_page_to_iter(): don't bother with kmap_atomic() for bvec/kvec cases
  iterate_xarray(): only of the first iteration we might get offset != 0
  pull handling of ->iov_offset into iterate_{iovec,bvec,xarray}
  iov_iter: make iterator callbacks use base and len instead of iovec
  iov_iter: make the amount already copied available to iterator callbacks
  iov_iter: get rid of separate bvec and xarray callbacks
  iov_iter: teach iterate_{bvec,xarray}() about possible short copies
  iterate_bvec(): expand bvec.h macro forest, massage a bit
  iov_iter: unify iterate_iovec and iterate_kvec
  iov_iter: massage iterate_iovec and iterate_kvec to logics similar to iterate_bvec
  iterate_and_advance(): get rid of magic in case when n is 0
  csum_and_copy_to_iter(): massage into form closer to csum_and_copy_from_iter()
  iov_iter: replace iov_iter_copy_from_user_atomic() with iterator-advancing variant
  [xarray] iov_iter_npages(): just use DIV_ROUND_UP()
  iov_iter_npages(): don't bother with iterate_all_kinds()
  ...
parents f92a322a 6852df12
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -890,3 +890,12 @@ been called or returned with non -EIOCBQUEUED code.

mnt_want_write_file() can now only be paired with mnt_drop_write_file(),
whereas previously it could be paired with mnt_drop_write() as well.

---

**mandatory**

iov_iter_copy_from_user_atomic() is gone; use copy_page_from_iter_atomic().
The difference is copy_page_from_iter_atomic() advances the iterator and
you don't need iov_iter_advance() after it.  However, if you decide to use
only a part of obtained data, you should do iov_iter_revert().
+11 −12
Original line number Diff line number Diff line
@@ -399,7 +399,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
		/*
		 * Copy data from userspace to the current page
		 */
		copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
		copied = copy_page_from_iter_atomic(page, offset, count, i);

		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
@@ -413,20 +413,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
		 * The rest of the btrfs_file_write code will fall
		 * back to page at a time copies after we return 0.
		 */
		if (!PageUptodate(page) && copied < count)
		if (unlikely(copied < count)) {
			if (!PageUptodate(page)) {
				iov_iter_revert(i, copied);
				copied = 0;
			}
			if (!copied)
				break;
		}

		iov_iter_advance(i, copied);
		write_bytes -= copied;
		total_copied += copied;

		/* Return to btrfs_file_write_iter to fault page */
		if (unlikely(copied == 0))
			break;

		if (copied < PAGE_SIZE - offset) {
		offset += copied;
		} else {
		if (offset == PAGE_SIZE) {
			pg++;
			offset = 0;
		}
+1 −3
Original line number Diff line number Diff line
@@ -1171,14 +1171,12 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
		if (mapping_writably_mapped(mapping))
			flush_dcache_page(page);

		tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
		tmp = copy_page_from_iter_atomic(page, offset, bytes, ii);
		flush_dcache_page(page);

		iov_iter_advance(ii, tmp);
		if (!tmp) {
			unlock_page(page);
			put_page(page);
			bytes = min(bytes, iov_iter_single_seg_count(ii));
			goto again;
		}

+15 −20
Original line number Diff line number Diff line
@@ -746,10 +746,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
		 * Otherwise there's a nasty deadlock on copying from the
		 * same page as we're writing to, without it being marked
		 * up-to-date.
		 *
		 * Not only is this an optimisation, but it is also required
		 * to check that the address is actually valid, when atomic
		 * usercopies are used, below.
		 */
		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
			status = -EFAULT;
@@ -764,30 +760,29 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
		if (mapping_writably_mapped(inode->i_mapping))
			flush_dcache_page(page);

		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
		copied = copy_page_from_iter_atomic(page, offset, bytes, i);

		copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
		status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
				srcmap);

		cond_resched();
		if (unlikely(copied != status))
			iov_iter_revert(i, copied - status);

		iov_iter_advance(i, copied);
		if (unlikely(copied == 0)) {
		cond_resched();
		if (unlikely(status == 0)) {
			/*
			 * If we were unable to copy any data at all, we must
			 * fall back to a single segment length write.
			 *
			 * If we didn't fallback here, we could livelock
			 * because not all segments in the iov can be copied at
			 * once without a pagefault.
			 * A short copy made iomap_write_end() reject the
			 * thing entirely.  Might be memory poisoning
			 * halfway through, might be a race with munmap,
			 * might be severe memory pressure.
			 */
			bytes = min_t(unsigned long, PAGE_SIZE - offset,
						iov_iter_single_seg_count(i));
			if (copied)
				bytes = copied;
			goto again;
		}
		pos += copied;
		written += copied;
		length -= copied;
		pos += status;
		written += status;
		length -= status;

		balance_dirty_pages_ratelimited(inode->i_mapping);
	} while (iov_iter_count(i) && length);
+10 −23
Original line number Diff line number Diff line
@@ -1684,20 +1684,17 @@ static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
{
	struct page **last_page = pages + nr_pages;
	size_t total = 0;
	struct iov_iter data = *i;
	unsigned len, copied;

	do {
		len = PAGE_SIZE - ofs;
		if (len > bytes)
			len = bytes;
		copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
				len);
		copied = copy_page_from_iter_atomic(*pages, ofs, len, i);
		total += copied;
		bytes -= copied;
		if (!bytes)
			break;
		iov_iter_advance(&data, copied);
		if (copied < len)
			goto err;
		ofs = 0;
@@ -1866,34 +1863,24 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
		if (likely(copied == bytes)) {
			status = ntfs_commit_pages_after_write(pages, do_pages,
					pos, bytes);
			if (!status)
				status = bytes;
		}
		do {
			unlock_page(pages[--do_pages]);
			put_page(pages[do_pages]);
		} while (do_pages);
		if (unlikely(status < 0))
		if (unlikely(status < 0)) {
			iov_iter_revert(i, copied);
			break;
		copied = status;
		}
		cond_resched();
		if (unlikely(!copied)) {
			size_t sc;

			/*
			 * We failed to copy anything.  Fall back to single
			 * segment length write.
			 *
			 * This is needed to avoid possible livelock in the
			 * case that all segments in the iov cannot be copied
			 * at once without a pagefault.
			 */
			sc = iov_iter_single_seg_count(i);
			if (bytes > sc)
				bytes = sc;
		if (unlikely(copied < bytes)) {
			iov_iter_revert(i, copied);
			if (copied)
				bytes = copied;
			else if (bytes > PAGE_SIZE - ofs)
				bytes = PAGE_SIZE - ofs;
			goto again;
		}
		iov_iter_advance(i, copied);
		pos += copied;
		written += copied;
		balance_dirty_pages_ratelimited(mapping);
Loading