Commit 1ded5e5a authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

net: annotate data-races around sock->ops



IPV6_ADDRFORM socket option is evil, because it can change sock->ops
while other threads might read it. Same issue for sk->sk_family
being set to AF_INET.

Adding READ_ONCE() over sock->ops reads is needed for sockets
that might be impacted by IPV6_ADDRFORM.

Note that mptcp_is_tcpsk() can also overwrite sock->ops.

Adding annotations for all sk->sk_family reads will require
more patches :/

BUG: KCSAN: data-race in ____sys_sendmsg / do_ipv6_setsockopt

write to 0xffff888109f24ca0 of 8 bytes by task 4470 on cpu 0:
do_ipv6_setsockopt+0x2c5e/0x2ce0 net/ipv6/ipv6_sockglue.c:491
ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012
udpv6_setsockopt+0x95/0xa0 net/ipv6/udp.c:1690
sock_common_setsockopt+0x61/0x70 net/core/sock.c:3663
__sys_setsockopt+0x1c3/0x230 net/socket.c:2273
__do_sys_setsockopt net/socket.c:2284 [inline]
__se_sys_setsockopt net/socket.c:2281 [inline]
__x64_sys_setsockopt+0x66/0x80 net/socket.c:2281
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

read to 0xffff888109f24ca0 of 8 bytes by task 4469 on cpu 1:
sock_sendmsg_nosec net/socket.c:724 [inline]
sock_sendmsg net/socket.c:747 [inline]
____sys_sendmsg+0x349/0x4c0 net/socket.c:2503
___sys_sendmsg net/socket.c:2557 [inline]
__sys_sendmmsg+0x263/0x500 net/socket.c:2643
__do_sys_sendmmsg net/socket.c:2672 [inline]
__se_sys_sendmmsg net/socket.c:2669 [inline]
__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2669
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0xffffffff850e32b8 -> 0xffffffff850da890

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 4469 Comm: syz-executor.1 Not tainted 6.4.0-rc5-syzkaller-00313-g4c605260bc60 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/25/2023

Reported-by: default avatarsyzbot <syzkaller@googlegroups.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230808135809.2300241-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent e05a53ab
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -123,7 +123,7 @@ struct socket {

	struct file		*file;
	struct sock		*sk;
	const struct proto_ops	*ops;
	const struct proto_ops	*ops; /* Might change with IPV6_ADDRFORM or MPTCP. */

	struct socket_wq	wq;
};
+2 −2
Original line number Diff line number Diff line
@@ -1019,7 +1019,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
		}
	}

	err = csocket->ops->connect(csocket,
	err = READ_ONCE(csocket->ops)->connect(csocket,
				    (struct sockaddr *)&sin_server,
				    sizeof(struct sockaddr_in), 0);
	if (err < 0) {
@@ -1060,7 +1060,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)

		return err;
	}
	err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
	err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server,
			sizeof(struct sockaddr_un) - 1, 0);
	if (err < 0) {
		pr_err("%s (%d): problem connecting socket: %s: %d\n",
+2 −1
Original line number Diff line number Diff line
@@ -130,6 +130,7 @@ EXPORT_SYMBOL(__scm_destroy);

int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
	const struct proto_ops *ops = READ_ONCE(sock->ops);
	struct cmsghdr *cmsg;
	int err;

@@ -153,7 +154,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
		switch (cmsg->cmsg_type)
		{
		case SCM_RIGHTS:
			if (!sock->ops || sock->ops->family != PF_UNIX)
			if (!ops || ops->family != PF_UNIX)
				goto error;
			err=scm_fp_copy(cmsg, &p->fp);
			if (err<0)
+6 −2
Original line number Diff line number Diff line
@@ -1198,13 +1198,17 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
static void sk_psock_verdict_data_ready(struct sock *sk)
{
	struct socket *sock = sk->sk_socket;
	const struct proto_ops *ops;
	int copied;

	trace_sk_data_ready(sk);

	if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
	if (unlikely(!sock))
		return;
	copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
	ops = READ_ONCE(sock->ops);
	if (!ops || !ops->read_skb)
		return;
	copied = ops->read_skb(sk, sk_psock_verdict_recv);
	if (copied >= 0) {
		struct sk_psock *psock;

+17 −7
Original line number Diff line number Diff line
@@ -1277,14 +1277,19 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
		break;

	case SO_RCVLOWAT:
		{
		int (*set_rcvlowat)(struct sock *sk, int val) = NULL;

		if (val < 0)
			val = INT_MAX;
		if (sock && sock->ops->set_rcvlowat)
			ret = sock->ops->set_rcvlowat(sk, val);
		if (sock)
			set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
		if (set_rcvlowat)
			ret = set_rcvlowat(sk, val);
		else
			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
		break;

		}
	case SO_RCVTIMEO_OLD:
	case SO_RCVTIMEO_NEW:
		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
@@ -1379,11 +1384,16 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
		break;

	case SO_PEEK_OFF:
		if (sock->ops->set_peek_off)
			ret = sock->ops->set_peek_off(sk, val);
		{
		int (*set_peek_off)(struct sock *sk, int val);

		set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
		if (set_peek_off)
			ret = set_peek_off(sk, val);
		else
			ret = -EOPNOTSUPP;
		break;
		}

	case SO_NOFCS:
		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
@@ -1816,7 +1826,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
	{
		struct sockaddr_storage address;

		lv = sock->ops->getname(sock, (struct sockaddr *)&address, 2);
		lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
		if (lv < 0)
			return -ENOTCONN;
		if (lv < len)
@@ -1858,7 +1868,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
		break;

	case SO_PEEK_OFF:
		if (!sock->ops->set_peek_off)
		if (!READ_ONCE(sock->ops)->set_peek_off)
			return -EOPNOTSUPP;

		v.val = READ_ONCE(sk->sk_peek_off);
Loading