Commit 0a96bbe4 authored by Bijan Mottahedeh's avatar Bijan Mottahedeh Committed by Jens Axboe
Browse files

io_uring: modularize io_sqe_buffer_register



Split io_sqe_buffer_register into two routines:

- io_sqe_buffer_register() registers a single buffer
- io_sqe_buffers_register iterates over all user specified buffers

Reviewed-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarBijan Mottahedeh <bijan.mottahedeh@oracle.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 3a81fd02
Loading
Loading
Loading
Loading
+107 −103
Original line number Diff line number Diff line
@@ -8370,7 +8370,7 @@ static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries)
	return pages;
}

static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
	int i, j;

@@ -8488,85 +8488,45 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
	return ret;
}

static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
				  unsigned nr_args)
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
				  struct io_mapped_ubuf *imu,
				  struct page **last_hpage)
{
	struct vm_area_struct **vmas = NULL;
	struct page **pages = NULL;
	struct page *last_hpage = NULL;
	int i, j, got_pages = 0;
	int ret = -EINVAL;

	if (ctx->user_bufs)
		return -EBUSY;
	if (!nr_args || nr_args > UIO_MAXIOV)
		return -EINVAL;

	ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
					GFP_KERNEL);
	if (!ctx->user_bufs)
		return -ENOMEM;

	for (i = 0; i < nr_args; i++) {
		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
	unsigned long off, start, end, ubuf;
		int pret, nr_pages;
		struct iovec iov;
	size_t size;
	int ret, pret, nr_pages, i;

		ret = io_copy_iov(ctx, &iov, arg, i);
		if (ret)
			goto err;

		/*
		 * Don't impose further limits on the size and buffer
		 * constraints here, we'll -EINVAL later when IO is
		 * submitted if they are wrong.
		 */
		ret = -EFAULT;
		if (!iov.iov_base || !iov.iov_len)
			goto err;

		/* arbitrary limit, but we need something */
		if (iov.iov_len > SZ_1G)
			goto err;

		ubuf = (unsigned long) iov.iov_base;
		end = (ubuf + iov.iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	ubuf = (unsigned long) iov->iov_base;
	end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	start = ubuf >> PAGE_SHIFT;
	nr_pages = end - start;

		ret = 0;
		if (!pages || nr_pages > got_pages) {
			kvfree(vmas);
			kvfree(pages);
			pages = kvmalloc_array(nr_pages, sizeof(struct page *),
						GFP_KERNEL);
			vmas = kvmalloc_array(nr_pages,
					sizeof(struct vm_area_struct *),
					GFP_KERNEL);
			if (!pages || !vmas) {
	ret = -ENOMEM;
				goto err;
			}
			got_pages = nr_pages;
		}

	pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
	if (!pages)
		goto done;

	vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *),
			      GFP_KERNEL);
	if (!vmas)
		goto done;

	imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
				   GFP_KERNEL);
		ret = -ENOMEM;
	if (!imu->bvec)
			goto err;
		goto done;

	ret = 0;
	mmap_read_lock(current->mm);
		pret = pin_user_pages(ubuf, nr_pages,
				      FOLL_WRITE | FOLL_LONGTERM,
	pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
			      pages, vmas);
	if (pret == nr_pages) {
		/* don't support file backed memory */
			for (j = 0; j < nr_pages; j++) {
				struct vm_area_struct *vma = vmas[j];
		for (i = 0; i < nr_pages; i++) {
			struct vm_area_struct *vma = vmas[i];

			if (vma->vm_file &&
			    !is_file_hugepages(vma->vm_file)) {
@@ -8586,42 +8546,86 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
		if (pret > 0)
			unpin_user_pages(pages, pret);
		kvfree(imu->bvec);
			goto err;
		goto done;
	}

		ret = io_buffer_account_pin(ctx, pages, pret, imu, &last_hpage);
	ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage);
	if (ret) {
		unpin_user_pages(pages, pret);
		kvfree(imu->bvec);
			goto err;
		goto done;
	}

	off = ubuf & ~PAGE_MASK;
		size = iov.iov_len;
		for (j = 0; j < nr_pages; j++) {
	size = iov->iov_len;
	for (i = 0; i < nr_pages; i++) {
		size_t vec_len;

		vec_len = min_t(size_t, size, PAGE_SIZE - off);
			imu->bvec[j].bv_page = pages[j];
			imu->bvec[j].bv_len = vec_len;
			imu->bvec[j].bv_offset = off;
		imu->bvec[i].bv_page = pages[i];
		imu->bvec[i].bv_len = vec_len;
		imu->bvec[i].bv_offset = off;
		off = 0;
		size -= vec_len;
	}
	/* store original address for later verification */
	imu->ubuf = ubuf;
		imu->len = iov.iov_len;
	imu->len = iov->iov_len;
	imu->nr_bvecs = nr_pages;
	ret = 0;
done:
	kvfree(pages);
	kvfree(vmas);
	return ret;
}

static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
				   unsigned int nr_args)
{
	int i, ret;
	struct iovec iov;
	struct page *last_hpage = NULL;

	if (ctx->user_bufs)
		return -EBUSY;
	if (!nr_args || nr_args > UIO_MAXIOV)
		return -EINVAL;

	ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
					GFP_KERNEL);
	if (!ctx->user_bufs)
		return -ENOMEM;

	for (i = 0; i < nr_args; i++) {
		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];

		ret = io_copy_iov(ctx, &iov, arg, i);
		if (ret)
			break;

		/*
		 * Don't impose further limits on the size and buffer
		 * constraints here, we'll -EINVAL later when IO is
		 * submitted if they are wrong.
		 */
		ret = -EFAULT;
		if (!iov.iov_base || !iov.iov_len)
			break;

		/* arbitrary limit, but we need something */
		if (iov.iov_len > SZ_1G)
			break;

		ret = io_sqe_buffer_register(ctx, &iov, imu, &last_hpage);
		if (ret)
			break;

		ctx->nr_user_bufs++;
	}
	kvfree(pages);
	kvfree(vmas);
	return 0;
err:
	kvfree(pages);
	kvfree(vmas);
	io_sqe_buffer_unregister(ctx);

	if (ret)
		io_sqe_buffers_unregister(ctx);

	return ret;
}

@@ -8675,7 +8679,7 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
	io_finish_async(ctx);
	io_sqe_buffer_unregister(ctx);
	io_sqe_buffers_unregister(ctx);

	if (ctx->sqo_task) {
		put_task_struct(ctx->sqo_task);
@@ -10057,13 +10061,13 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,

	switch (opcode) {
	case IORING_REGISTER_BUFFERS:
		ret = io_sqe_buffer_register(ctx, arg, nr_args);
		ret = io_sqe_buffers_register(ctx, arg, nr_args);
		break;
	case IORING_UNREGISTER_BUFFERS:
		ret = -EINVAL;
		if (arg || nr_args)
			break;
		ret = io_sqe_buffer_unregister(ctx);
		ret = io_sqe_buffers_unregister(ctx);
		break;
	case IORING_REGISTER_FILES:
		ret = io_sqe_files_register(ctx, arg, nr_args);