Commit 7d4e87e9 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'splice-net-some-miscellaneous-msg_splice_pages-changes'

David Howells says:

====================
splice, net: Some miscellaneous MSG_SPLICE_PAGES changes

Now that the splice_to_socket() has been rewritten so that nothing now uses
the ->sendpage() file op[1], some further changes can be made, so here are
some miscellaneous changes that can now be done.

 (1) Remove the ->sendpage() file op.

 (2) Remove hash_sendpage*() from AF_ALG.

 (3) Make sunrpc send multiple pages in single sendmsg() call rather than
     calling sendpage() in TCP (or maybe TLS).

 (4) Make tcp_bpf_sendpage() a wrapper around tcp_bpf_sendmsg().

 (5) Make AF_KCM use sendmsg() when calling down to TCP and then make it
     send entire fragment lists in single sendmsg calls.

Link: https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=fd5f4d7da29218485153fd8b4c08da7fc130c79f [1]
====================

Link: https://lore.kernel.org/r/20230609100221.2620633-1-dhowells@redhat.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents ccbe64be c31a25e1
Loading
Loading
Loading
Loading
+0 −66
Original line number Diff line number Diff line
@@ -161,58 +161,6 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
	goto unlock;
}

static ssize_t hash_sendpage(struct socket *sock, struct page *page,
			     int offset, size_t size, int flags)
{
	struct sock *sk = sock->sk;
	struct alg_sock *ask = alg_sk(sk);
	struct hash_ctx *ctx = ask->private;
	int err;

	if (flags & MSG_SENDPAGE_NOTLAST)
		flags |= MSG_MORE;

	lock_sock(sk);
	sg_init_table(ctx->sgl.sgl, 1);
	sg_set_page(ctx->sgl.sgl, page, size, offset);

	if (!(flags & MSG_MORE)) {
		err = hash_alloc_result(sk, ctx);
		if (err)
			goto unlock;
	} else if (!ctx->more)
		hash_free_result(sk, ctx);

	ahash_request_set_crypt(&ctx->req, ctx->sgl.sgl, ctx->result, size);

	if (!(flags & MSG_MORE)) {
		if (ctx->more)
			err = crypto_ahash_finup(&ctx->req);
		else
			err = crypto_ahash_digest(&ctx->req);
	} else {
		if (!ctx->more) {
			err = crypto_ahash_init(&ctx->req);
			err = crypto_wait_req(err, &ctx->wait);
			if (err)
				goto unlock;
		}

		err = crypto_ahash_update(&ctx->req);
	}

	err = crypto_wait_req(err, &ctx->wait);
	if (err)
		goto unlock;

	ctx->more = flags & MSG_MORE;

unlock:
	release_sock(sk);

	return err ?: size;
}

static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
			int flags)
{
@@ -328,7 +276,6 @@ static struct proto_ops algif_hash_ops = {

	.release	=	af_alg_release,
	.sendmsg	=	hash_sendmsg,
	.sendpage	=	hash_sendpage,
	.recvmsg	=	hash_recvmsg,
	.accept		=	hash_accept,
};
@@ -380,18 +327,6 @@ static int hash_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
	return hash_sendmsg(sock, msg, size);
}

static ssize_t hash_sendpage_nokey(struct socket *sock, struct page *page,
				   int offset, size_t size, int flags)
{
	int err;

	err = hash_check_key(sock);
	if (err)
		return err;

	return hash_sendpage(sock, page, offset, size, flags);
}

static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
			      size_t ignored, int flags)
{
@@ -430,7 +365,6 @@ static struct proto_ops algif_hash_ops_nokey = {

	.release	=	af_alg_release,
	.sendmsg	=	hash_sendmsg_nokey,
	.sendpage	=	hash_sendpage_nokey,
	.recvmsg	=	hash_recvmsg_nokey,
	.accept		=	hash_accept_nokey,
};
+0 −1
Original line number Diff line number Diff line
@@ -1790,7 +1790,6 @@ struct file_operations {
	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
	int (*fasync) (int, struct file *, int);
	int (*lock) (struct file *, int, struct file_lock *);
	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
	int (*check_flags)(int);
	int (*flock) (struct file *, int, struct file_lock *);
+5 −6
Original line number Diff line number Diff line
@@ -161,16 +161,15 @@ static inline bool svc_put_not_last(struct svc_serv *serv)
extern u32 svc_max_payload(const struct svc_rqst *rqstp);

/*
 * RPC Requsts and replies are stored in one or more pages.
 * RPC Requests and replies are stored in one or more pages.
 * We maintain an array of pages for each server thread.
 * Requests are copied into these pages as they arrive.  Remaining
 * pages are available to write the reply into.
 *
 * Pages are sent using ->sendpage so each server thread needs to
 * allocate more to replace those used in sending.  To help keep track
 * of these pages we have a receive list where all pages initialy live,
 * and a send list where pages are moved to when there are to be part
 * of a reply.
 * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread
 * needs to allocate more to replace those used in sending.  To help keep track
 * of these pages we have a receive list where all pages initialy live, and a
 * send list where pages are moved to when there are to be part of a reply.
 *
 * We use xdr_buf for holding responses as it fits well with NFS
 * read responses (that have a header, and some data pages, and possibly
+1 −1
Original line number Diff line number Diff line
@@ -47,9 +47,9 @@ struct kcm_stats {

struct kcm_tx_msg {
	unsigned int sent;
	unsigned int fragidx;
	unsigned int frag_offset;
	unsigned int msg_flags;
	bool started_tx;
	struct sk_buff *frag_skb;
	struct sk_buff *last_skb;
};
+9 −40
Original line number Diff line number Diff line
@@ -568,49 +568,18 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
			    size_t size, int flags)
{
	struct sk_msg tmp, *msg = NULL;
	int err = 0, copied = 0;
	struct sk_psock *psock;
	bool enospc = false;

	psock = sk_psock_get(sk);
	if (unlikely(!psock))
		return tcp_sendpage(sk, page, offset, size, flags);

	lock_sock(sk);
	if (psock->cork) {
		msg = psock->cork;
	} else {
		msg = &tmp;
		sk_msg_init(msg);
	}
	struct bio_vec bvec;
	struct msghdr msg = {
		.msg_flags = flags | MSG_SPLICE_PAGES,
	};

	/* Catch case where ring is full and sendpage is stalled. */
	if (unlikely(sk_msg_full(msg)))
		goto out_err;
	bvec_set_page(&bvec, page, size, offset);
	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);

	sk_msg_page_add(msg, page, size, offset);
	sk_mem_charge(sk, size);
	copied = size;
	if (sk_msg_full(msg))
		enospc = true;
	if (psock->cork_bytes) {
		if (size > psock->cork_bytes)
			psock->cork_bytes = 0;
		else
			psock->cork_bytes -= size;
		if (psock->cork_bytes && !enospc)
			goto out_err;
		/* All cork bytes are accounted, rerun the prog. */
		psock->eval = __SK_NONE;
		psock->cork_bytes = 0;
	}
	if (flags & MSG_SENDPAGE_NOTLAST)
		msg.msg_flags |= MSG_MORE;

	err = tcp_bpf_send_verdict(sk, psock, msg, &copied, flags);
out_err:
	release_sock(sk);
	sk_psock_put(sk, psock);
	return copied ? copied : err;
	return tcp_bpf_sendmsg(sk, &msg, size);
}

enum {
Loading