Commit 7998c0ad authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'tcp-add-missing-annotations'

Eric Dumazet says:

====================
tcp: add missing annotations

This series was inspired by one syzbot (KCSAN) report.

do_tcp_getsockopt() does not lock the socket, we need to
annotate most of the reads there (and other places as well).

This is a first round, another series will come later.
====================

Link: https://lore.kernel.org/r/20230719212857.3943972-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents ac528649 70f360dd
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -513,7 +513,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
	int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn);

	queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
	WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn));
}

static inline void tcp_move_syn(struct tcp_sock *tp,
+24 −7
Original line number Diff line number Diff line
@@ -1509,25 +1509,38 @@ void tcp_leave_memory_pressure(struct sock *sk);
static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
	struct net *net = sock_net((struct sock *)tp);
	int val;

	return tp->keepalive_intvl ? :
		READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
	/* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl()
	 * and do_tcp_setsockopt().
	 */
	val = READ_ONCE(tp->keepalive_intvl);

	return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}

static inline int keepalive_time_when(const struct tcp_sock *tp)
{
	struct net *net = sock_net((struct sock *)tp);
	int val;

	return tp->keepalive_time ? :
		READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
	/* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */
	val = READ_ONCE(tp->keepalive_time);

	return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}

static inline int keepalive_probes(const struct tcp_sock *tp)
{
	struct net *net = sock_net((struct sock *)tp);
	int val;

	return tp->keepalive_probes ? :
		READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
	/* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt()
	 * and do_tcp_setsockopt().
	 */
	val = READ_ONCE(tp->keepalive_probes);

	return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}

static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -2048,7 +2061,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
	struct net *net = sock_net((struct sock *)tp);
	return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
	u32 val;

	val = READ_ONCE(tp->notsent_lowat);

	return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}

bool tcp_stream_memory_free(const struct sock *sk, int wake);
+1 −1
Original line number Diff line number Diff line
@@ -1019,7 +1019,7 @@ static void reqsk_timer_handler(struct timer_list *t)

	icsk = inet_csk(sk_listener);
	net = sock_net(sk_listener);
	max_syn_ack_retries = icsk->icsk_syn_retries ? :
	max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
		READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
	/* Normally all the openreqs are young and become mature
	 * (i.e. converted to established socket) for first timeout.
+30 −27
Original line number Diff line number Diff line
@@ -3291,7 +3291,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
		return -EINVAL;

	lock_sock(sk);
	inet_csk(sk)->icsk_syn_retries = val;
	WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
	release_sock(sk);
	return 0;
}
@@ -3300,7 +3300,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt);
void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
{
	lock_sock(sk);
	inet_csk(sk)->icsk_user_timeout = val;
	WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
	release_sock(sk);
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
@@ -3312,7 +3312,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
	if (val < 1 || val > MAX_TCP_KEEPIDLE)
		return -EINVAL;

	tp->keepalive_time = val * HZ;
	/* Paired with WRITE_ONCE() in keepalive_time_when() */
	WRITE_ONCE(tp->keepalive_time, val * HZ);
	if (sock_flag(sk, SOCK_KEEPOPEN) &&
	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
		u32 elapsed = keepalive_time_elapsed(tp);
@@ -3344,7 +3345,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
		return -EINVAL;

	lock_sock(sk);
	tcp_sk(sk)->keepalive_intvl = val * HZ;
	WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
	release_sock(sk);
	return 0;
}
@@ -3356,7 +3357,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
		return -EINVAL;

	lock_sock(sk);
	tcp_sk(sk)->keepalive_probes = val;
	/* Paired with READ_ONCE() in keepalive_probes() */
	WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
	release_sock(sk);
	return 0;
}
@@ -3558,19 +3560,19 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
		if (val < 1 || val > MAX_TCP_KEEPINTVL)
			err = -EINVAL;
		else
			tp->keepalive_intvl = val * HZ;
			WRITE_ONCE(tp->keepalive_intvl, val * HZ);
		break;
	case TCP_KEEPCNT:
		if (val < 1 || val > MAX_TCP_KEEPCNT)
			err = -EINVAL;
		else
			tp->keepalive_probes = val;
			WRITE_ONCE(tp->keepalive_probes, val);
		break;
	case TCP_SYNCNT:
		if (val < 1 || val > MAX_TCP_SYNCNT)
			err = -EINVAL;
		else
			icsk->icsk_syn_retries = val;
			WRITE_ONCE(icsk->icsk_syn_retries, val);
		break;

	case TCP_SAVE_SYN:
@@ -3583,18 +3585,18 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,

	case TCP_LINGER2:
		if (val < 0)
			tp->linger2 = -1;
			WRITE_ONCE(tp->linger2, -1);
		else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
			tp->linger2 = TCP_FIN_TIMEOUT_MAX;
			WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
		else
			tp->linger2 = val * HZ;
			WRITE_ONCE(tp->linger2, val * HZ);
		break;

	case TCP_DEFER_ACCEPT:
		/* Translate value in seconds to number of retransmits */
		icsk->icsk_accept_queue.rskq_defer_accept =
		WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
			   secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
					TCP_RTO_MAX / HZ);
					   TCP_RTO_MAX / HZ));
		break;

	case TCP_WINDOW_CLAMP:
@@ -3618,7 +3620,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
		if (val < 0)
			err = -EINVAL;
		else
			icsk->icsk_user_timeout = val;
			WRITE_ONCE(icsk->icsk_user_timeout, val);
		break;

	case TCP_FASTOPEN:
@@ -3656,13 +3658,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
		if (!tp->repair)
			err = -EPERM;
		else
			tp->tsoffset = val - tcp_time_stamp_raw();
			WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
		break;
	case TCP_REPAIR_WINDOW:
		err = tcp_repair_set_window(tp, optval, optlen);
		break;
	case TCP_NOTSENT_LOWAT:
		tp->notsent_lowat = val;
		WRITE_ONCE(tp->notsent_lowat, val);
		sk->sk_write_space(sk);
		break;
	case TCP_INQ:
@@ -3674,7 +3676,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
	case TCP_TX_DELAY:
		if (val)
			tcp_enable_tx_delay();
		tp->tcp_tx_delay = val;
		WRITE_ONCE(tp->tcp_tx_delay, val);
		break;
	default:
		err = -ENOPROTOOPT;
@@ -3991,17 +3993,18 @@ int do_tcp_getsockopt(struct sock *sk, int level,
		val = keepalive_probes(tp);
		break;
	case TCP_SYNCNT:
		val = icsk->icsk_syn_retries ? :
		val = READ_ONCE(icsk->icsk_syn_retries) ? :
			READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
		break;
	case TCP_LINGER2:
		val = tp->linger2;
		val = READ_ONCE(tp->linger2);
		if (val >= 0)
			val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
		break;
	case TCP_DEFER_ACCEPT:
		val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
				      TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
		val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
		val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
				      TCP_RTO_MAX / HZ);
		break;
	case TCP_WINDOW_CLAMP:
		val = tp->window_clamp;
@@ -4138,11 +4141,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
		break;

	case TCP_USER_TIMEOUT:
		val = icsk->icsk_user_timeout;
		val = READ_ONCE(icsk->icsk_user_timeout);
		break;

	case TCP_FASTOPEN:
		val = icsk->icsk_accept_queue.fastopenq.max_qlen;
		val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
		break;

	case TCP_FASTOPEN_CONNECT:
@@ -4154,14 +4157,14 @@ int do_tcp_getsockopt(struct sock *sk, int level,
		break;

	case TCP_TX_DELAY:
		val = tp->tcp_tx_delay;
		val = READ_ONCE(tp->tcp_tx_delay);
		break;

	case TCP_TIMESTAMP:
		val = tcp_time_stamp_raw() + tp->tsoffset;
		val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
		break;
	case TCP_NOTSENT_LOWAT:
		val = tp->notsent_lowat;
		val = READ_ONCE(tp->notsent_lowat);
		break;
	case TCP_INQ:
		val = tp->recvmsg_inq;
+4 −2
Original line number Diff line number Diff line
@@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
static bool tcp_fastopen_queue_check(struct sock *sk)
{
	struct fastopen_queue *fastopenq;
	int max_qlen;

	/* Make sure the listener has enabled fastopen, and we don't
	 * exceed the max # of pending TFO requests allowed before trying
@@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
	 * temporarily vs a server not supporting Fast Open at all.
	 */
	fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
	if (fastopenq->max_qlen == 0)
	max_qlen = READ_ONCE(fastopenq->max_qlen);
	if (max_qlen == 0)
		return false;

	if (fastopenq->qlen >= fastopenq->max_qlen) {
	if (fastopenq->qlen >= max_qlen) {
		struct request_sock *req1;
		spin_lock(&fastopenq->lock);
		req1 = fastopenq->rskq_rst_head;
Loading