Commit d58f2e15 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

tcp: set TCP_USER_TIMEOUT locklessly



icsk->icsk_user_timeout can be set locklessly,
if all read sides use READ_ONCE().

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d44fd4a7
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -564,6 +564,6 @@ void __tcp_sock_set_nodelay(struct sock *sk, bool on);
void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
int tcp_sock_set_user_timeout(struct sock *sk, int val);

#endif	/* _LINUX_TCP_H */
+10 −13
Original line number Diff line number Diff line
@@ -3296,11 +3296,16 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
}
EXPORT_SYMBOL(tcp_sock_set_syncnt);

void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
int tcp_sock_set_user_timeout(struct sock *sk, int val)
{
	lock_sock(sk);
	/* Cap the max time in ms TCP will retry or probe the window
	 * before giving up and aborting (ETIMEDOUT) a connection.
	 */
	if (val < 0)
		return -EINVAL;

	WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
	release_sock(sk);
	return 0;
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);

@@ -3464,6 +3469,8 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
	switch (optname) {
	case TCP_SYNCNT:
		return tcp_sock_set_syncnt(sk, val);
	case TCP_USER_TIMEOUT:
		return tcp_sock_set_user_timeout(sk, val);
	}

	sockopt_lock_sock(sk);
@@ -3611,16 +3618,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
		err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
		break;
#endif
	case TCP_USER_TIMEOUT:
		/* Cap the max time in ms TCP will retry or probe the window
		 * before giving up and aborting (ETIMEDOUT) a connection.
		 */
		if (val < 0)
			err = -EINVAL;
		else
			WRITE_ONCE(icsk->icsk_user_timeout, val);
		break;

	case TCP_FASTOPEN:
		if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
		    TCPF_LISTEN))) {
+22 −15
Original line number Diff line number Diff line
@@ -26,14 +26,15 @@
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	u32 elapsed, start_ts;
	u32 elapsed, start_ts, user_timeout;
	s32 remaining;

	start_ts = tcp_sk(sk)->retrans_stamp;
	if (!icsk->icsk_user_timeout)
	user_timeout = READ_ONCE(icsk->icsk_user_timeout);
	if (!user_timeout)
		return icsk->icsk_rto;
	elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
	remaining = icsk->icsk_user_timeout - elapsed;
	remaining = user_timeout - elapsed;
	if (remaining <= 0)
		return 1; /* user timeout has passed; fire ASAP */

@@ -43,16 +44,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	u32 remaining;
	u32 remaining, user_timeout;
	s32 elapsed;

	if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp)
	user_timeout = READ_ONCE(icsk->icsk_user_timeout);
	if (!user_timeout || !icsk->icsk_probes_tstamp)
		return when;

	elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
	if (unlikely(elapsed < 0))
		elapsed = 0;
	remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
	remaining = msecs_to_jiffies(user_timeout) - elapsed;
	remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);

	return min_t(u32, remaining, when);
@@ -270,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk)
	}
	if (!expired)
		expired = retransmits_timed_out(sk, retry_until,
						icsk->icsk_user_timeout);
						READ_ONCE(icsk->icsk_user_timeout));
	tcp_fastopen_active_detect_blackhole(sk, expired);

	if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
@@ -384,13 +386,16 @@ static void tcp_probe_timer(struct sock *sk)
	 * corresponding system limit. We also implement similar policy when
	 * we use RTO to probe window in tcp_retransmit_timer().
	 */
	if (!icsk->icsk_probes_tstamp)
	if (!icsk->icsk_probes_tstamp) {
		icsk->icsk_probes_tstamp = tcp_jiffies32;
	else if (icsk->icsk_user_timeout &&
	} else {
		u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);

		if (user_timeout &&
		    (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
		 msecs_to_jiffies(icsk->icsk_user_timeout))
		     msecs_to_jiffies(user_timeout))
		goto abort;

	}
	max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
	if (sock_flag(sk, SOCK_DEAD)) {
		const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -734,13 +739,15 @@ static void tcp_keepalive_timer (struct timer_list *t)
	elapsed = keepalive_time_elapsed(tp);

	if (elapsed >= keepalive_time_when(tp)) {
		u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);

		/* If the TCP_USER_TIMEOUT option is enabled, use that
		 * to determine when to timeout instead.
		 */
		if ((icsk->icsk_user_timeout != 0 &&
		    elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
		if ((user_timeout != 0 &&
		    elapsed >= msecs_to_jiffies(user_timeout) &&
		    icsk->icsk_probes_out > 0) ||
		    (icsk->icsk_user_timeout == 0 &&
		    (user_timeout == 0 &&
		    icsk->icsk_probes_out >= keepalive_probes(tp))) {
			tcp_send_active_reset(sk, GFP_ATOMIC);
			tcp_write_err(sk);