Commit 57d44a35 authored by David Howells's avatar David Howells Committed by Jakub Kicinski
Browse files

unix: Convert unix_stream_sendpage() to use MSG_SPLICE_PAGES



Convert unix_stream_sendpage() to use sendmsg() with MSG_SPLICE_PAGES
rather than directly splicing in the pages itself.

This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Kuniyuki Iwashima <kuniyu@amazon.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent a0dbf5f8
Loading
Loading
Loading
Loading
+7 −127
Original line number Diff line number Diff line
@@ -1839,24 +1839,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
	}
}

static int maybe_init_creds(struct scm_cookie *scm,
			    struct socket *socket,
			    const struct sock *other)
{
	int err;
	struct msghdr msg = { .msg_controllen = 0 };

	err = scm_send(socket, &msg, scm, false);
	if (err)
		return err;

	if (unix_passcred_enabled(socket, other)) {
		scm->pid = get_pid(task_tgid(current));
		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
	}
	return err;
}

static bool unix_skb_scm_eq(struct sk_buff *skb,
			    struct scm_cookie *scm)
{
@@ -2292,117 +2274,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
				    int offset, size_t size, int flags)
{
	int err;
	bool send_sigpipe = false;
	bool init_scm = true;
	struct scm_cookie scm;
	struct sock *other, *sk = socket->sk;
	struct sk_buff *skb, *newskb = NULL, *tail = NULL;

	if (flags & MSG_OOB)
		return -EOPNOTSUPP;

	other = unix_peer(sk);
	if (!other || sk->sk_state != TCP_ESTABLISHED)
		return -ENOTCONN;

	if (false) {
alloc_skb:
		unix_state_unlock(other);
		mutex_unlock(&unix_sk(other)->iolock);
		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
					      &err, 0);
		if (!newskb)
			goto err;
	}

	/* we must acquire iolock as we modify already present
	 * skbs in the sk_receive_queue and mess with skb->len
	 */
	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
	if (err) {
		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
		goto err;
	}
	struct bio_vec bvec;
	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };

	if (sk->sk_shutdown & SEND_SHUTDOWN) {
		err = -EPIPE;
		send_sigpipe = true;
		goto err_unlock;
	}

	unix_state_lock(other);
	if (flags & MSG_SENDPAGE_NOTLAST)
		msg.msg_flags |= MSG_MORE;

	if (sock_flag(other, SOCK_DEAD) ||
	    other->sk_shutdown & RCV_SHUTDOWN) {
		err = -EPIPE;
		send_sigpipe = true;
		goto err_state_unlock;
	}

	if (init_scm) {
		err = maybe_init_creds(&scm, socket, other);
		if (err)
			goto err_state_unlock;
		init_scm = false;
	}

	skb = skb_peek_tail(&other->sk_receive_queue);
	if (tail && tail == skb) {
		skb = newskb;
	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
		if (newskb) {
			skb = newskb;
		} else {
			tail = skb;
			goto alloc_skb;
		}
	} else if (newskb) {
		/* this is fast path, we don't necessarily need to
		 * call to kfree_skb even though with newskb == NULL
		 * this - does no harm
		 */
		consume_skb(newskb);
		newskb = NULL;
	}

	if (skb_append_pagefrags(skb, page, offset, size, MAX_SKB_FRAGS)) {
		tail = skb;
		goto alloc_skb;
	}

	skb->len += size;
	skb->data_len += size;
	skb->truesize += size;
	refcount_add(size, &sk->sk_wmem_alloc);

	if (newskb) {
		err = unix_scm_to_skb(&scm, skb, false);
		if (err)
			goto err_state_unlock;
		spin_lock(&other->sk_receive_queue.lock);
		__skb_queue_tail(&other->sk_receive_queue, newskb);
		spin_unlock(&other->sk_receive_queue.lock);
	}

	unix_state_unlock(other);
	mutex_unlock(&unix_sk(other)->iolock);

	other->sk_data_ready(other);
	scm_destroy(&scm);
	return size;

err_state_unlock:
	unix_state_unlock(other);
err_unlock:
	mutex_unlock(&unix_sk(other)->iolock);
err:
	kfree_skb(newskb);
	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
		send_sig(SIGPIPE, current, 0);
	if (!init_scm)
		scm_destroy(&scm);
	return err;
	bvec_set_page(&bvec, page, size, offset);
	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
	return unix_stream_sendmsg(socket, &msg, size);
}

static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,