Commit 5298953e authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jakub Kicinski
Browse files

udp6: don't make extra copies of iflow



udpv6_sendmsg() first initialises an on-stack 88B struct flowi6 and then
copies it into cork, which is expensive. Avoid the copy in corkless case
by initialising on-stack cork->fl directly.

The main part is a couple of lines under !corkreq check. The rest
converts fl6 variable to be a pointer.

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Reviewed-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent f37a4cc6
Loading
Loading
Loading
Loading
+42 −43
Original line number Diff line number Diff line
@@ -1294,7 +1294,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	struct ipv6_txoptions *opt = NULL;
	struct ipv6_txoptions *opt_to_free = NULL;
	struct ip6_flowlabel *flowlabel = NULL;
	struct flowi6 fl6;
	struct inet_cork_full cork;
	struct flowi6 *fl6 = &cork.fl.u.ip6;
	struct dst_entry *dst;
	struct ipcm6_cookie ipc6;
	int addr_len = msg->msg_namelen;
@@ -1384,19 +1385,19 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	}
	ulen += sizeof(struct udphdr);

	memset(&fl6, 0, sizeof(fl6));
	memset(fl6, 0, sizeof(*fl6));

	if (sin6) {
		if (sin6->sin6_port == 0)
			return -EINVAL;

		fl6.fl6_dport = sin6->sin6_port;
		fl6->fl6_dport = sin6->sin6_port;
		daddr = &sin6->sin6_addr;

		if (np->sndflow) {
			fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
			if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
			fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
			if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
				flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
				if (IS_ERR(flowlabel))
					return -EINVAL;
			}
@@ -1413,24 +1414,24 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
		if (addr_len >= sizeof(struct sockaddr_in6) &&
		    sin6->sin6_scope_id &&
		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
			fl6.flowi6_oif = sin6->sin6_scope_id;
			fl6->flowi6_oif = sin6->sin6_scope_id;
	} else {
		if (sk->sk_state != TCP_ESTABLISHED)
			return -EDESTADDRREQ;

		fl6.fl6_dport = inet->inet_dport;
		fl6->fl6_dport = inet->inet_dport;
		daddr = &sk->sk_v6_daddr;
		fl6.flowlabel = np->flow_label;
		fl6->flowlabel = np->flow_label;
		connected = true;
	}

	if (!fl6.flowi6_oif)
		fl6.flowi6_oif = sk->sk_bound_dev_if;
	if (!fl6->flowi6_oif)
		fl6->flowi6_oif = sk->sk_bound_dev_if;

	if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
	if (!fl6->flowi6_oif)
		fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;

	fl6.flowi6_uid = sk->sk_uid;
	fl6->flowi6_uid = sk->sk_uid;

	if (msg->msg_controllen) {
		opt = &opt_space;
@@ -1440,14 +1441,14 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

		err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
		if (err > 0)
			err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
			err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
						    &ipc6);
		if (err < 0) {
			fl6_sock_release(flowlabel);
			return err;
		}
		if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
		if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
			flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
			if (IS_ERR(flowlabel))
				return -EINVAL;
		}
@@ -1464,16 +1465,17 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	opt = ipv6_fixup_options(&opt_space, opt);
	ipc6.opt = opt;

	fl6.flowi6_proto = sk->sk_protocol;
	fl6.flowi6_mark = ipc6.sockc.mark;
	fl6.daddr = *daddr;
	if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
		fl6.saddr = np->saddr;
	fl6.fl6_sport = inet->inet_sport;
	fl6->flowi6_proto = sk->sk_protocol;
	fl6->flowi6_mark = ipc6.sockc.mark;
	fl6->daddr = *daddr;
	if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
		fl6->saddr = np->saddr;
	fl6->fl6_sport = inet->inet_sport;

	if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
		err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
					   (struct sockaddr *)sin6, &fl6.saddr);
					   (struct sockaddr *)sin6,
					   &fl6->saddr);
		if (err)
			goto out_no_dst;
		if (sin6) {
@@ -1489,32 +1491,32 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
				err = -EINVAL;
				goto out_no_dst;
			}
			fl6.fl6_dport = sin6->sin6_port;
			fl6.daddr = sin6->sin6_addr;
			fl6->fl6_dport = sin6->sin6_port;
			fl6->daddr = sin6->sin6_addr;
		}
	}

	if (ipv6_addr_any(&fl6.daddr))
		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
	if (ipv6_addr_any(&fl6->daddr))
		fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */

	final_p = fl6_update_dst(&fl6, opt, &final);
	final_p = fl6_update_dst(fl6, opt, &final);
	if (final_p)
		connected = false;

	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
		fl6.flowi6_oif = np->mcast_oif;
	if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
		fl6->flowi6_oif = np->mcast_oif;
		connected = false;
	} else if (!fl6.flowi6_oif)
		fl6.flowi6_oif = np->ucast_oif;
	} else if (!fl6->flowi6_oif)
		fl6->flowi6_oif = np->ucast_oif;

	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));

	if (ipc6.tclass < 0)
		ipc6.tclass = np->tclass;

	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
	fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);

	dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected);
	dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
	if (IS_ERR(dst)) {
		err = PTR_ERR(dst);
		dst = NULL;
@@ -1522,7 +1524,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	}

	if (ipc6.hlimit < 0)
		ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
		ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst);

	if (msg->msg_flags&MSG_CONFIRM)
		goto do_confirm;
@@ -1530,18 +1532,15 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

	/* Lockless fast path for the non-corking case */
	if (!corkreq) {
		struct inet_cork_full cork;
		struct sk_buff *skb;

		cork.fl.u.ip6 = fl6;

		skb = ip6_make_skb(sk, getfrag, msg, ulen,
				   sizeof(struct udphdr), &ipc6,
				   (struct rt6_info *)dst,
				   msg->msg_flags, &cork);
		err = PTR_ERR(skb);
		if (!IS_ERR_OR_NULL(skb))
			err = udp_v6_send_skb(skb, &fl6, &cork.base);
			err = udp_v6_send_skb(skb, fl6, &cork.base);
		goto out;
	}

@@ -1563,7 +1562,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
		ipc6.dontfrag = np->dontfrag;
	up->len += ulen;
	err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
			      &ipc6, &fl6, (struct rt6_info *)dst,
			      &ipc6, fl6, (struct rt6_info *)dst,
			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
	if (err)
		udp_v6_flush_pending_frames(sk);
@@ -1598,7 +1597,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

do_confirm:
	if (msg->msg_flags & MSG_PROBE)
		dst_confirm_neigh(dst, &fl6.daddr);
		dst_confirm_neigh(dst, &fl6->daddr);
	if (!(msg->msg_flags&MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;