Commit f5518257 authored by Geliang Tang's avatar Geliang Tang
Browse files

inet: implement lockless IP_TOS

mainline inclusion
from mainline-v6.7-rc1
commit e08d0b3d172311e2bb500865c0d0038533d0ff11
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I9VYQ9
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=e08d0b3d172311e2bb500865c0d0038533d0ff11



--------------------------------

Some reads of inet->tos are racy.

Add needed READ_ONCE() annotations and convert IP_TOS option lockless.

v2: missing changes in include/net/route.h (David Ahern)
rebased: on "mptcp: fix setsockopt(IP_TOS) subflow locking" (Geliang)

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Reviewed-by: default avatarJackie Liu <liuyun01@kylinos.cn>
Signed-off-by: default avatarGeliang Tang <tanggeliang@kylinos.cn>
parent 86cc03e9
Loading
Loading
Loading
Loading
+11 −9
Original line number Diff line number Diff line
@@ -587,12 +587,14 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)

void __ip_sock_set_tos(struct sock *sk, int val)
{
	u8 old_tos = inet_sk(sk)->tos;

	if (sk->sk_type == SOCK_STREAM) {
		val &= ~INET_ECN_MASK;
		val |= inet_sk(sk)->tos & INET_ECN_MASK;
		val |= old_tos & INET_ECN_MASK;
	}
	if (inet_sk(sk)->tos != val) {
		inet_sk(sk)->tos = val;
	if (old_tos != val) {
		WRITE_ONCE(inet_sk(sk)->tos, val);
		WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
		sk_dst_reset(sk);
	}
@@ -1050,6 +1052,9 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
		return 0;
	case IP_MTU_DISCOVER:
		return ip_sock_set_mtu_discover(sk, val);
	case IP_TOS:	/* This sets both TOS and Precedence */
		ip_sock_set_tos(sk, val);
		return 0;
	}

	err = 0;
@@ -1104,9 +1109,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
			}
		}
		break;
	case IP_TOS:	/* This sets both TOS and Precedence */
		__ip_sock_set_tos(sk, val);
		break;
	case IP_UNICAST_IF:
	{
		struct net_device *dev = NULL;
@@ -1595,6 +1597,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
	case IP_MTU_DISCOVER:
		val = READ_ONCE(inet->pmtudisc);
		goto copyval;
	case IP_TOS:
		val = READ_ONCE(inet->tos);
		goto copyval;
	}

	if (needs_rtnl)
@@ -1631,9 +1636,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
			return -EFAULT;
		return 0;
	}
	case IP_TOS:
		val = inet->tos;
		break;
	case IP_MTU:
	{
		struct dst_entry *dst;
+5 −4
Original line number Diff line number Diff line
@@ -1024,10 +1024,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
	if (skb) {
		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);

		tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
				(inet_sk(sk)->tos & INET_ECN_MASK) :
				inet_sk(sk)->tos;
		tos = READ_ONCE(inet_sk(sk)->tos);

		if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
			tos = (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
			      (tos & INET_ECN_MASK);

		if (!INET_ECN_is_capable(tos) &&
		    tcp_bpf_ca_needs_ecn((struct sock *)req))