Commit 40570375 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

tcp: add accessors to read/set tp->snd_cwnd



We had various bugs over the years with code
breaking the assumption that tp->snd_cwnd is greater
than zero.

Lately, syzbot reported the WARN_ON_ONCE(!tp->prior_cwnd) added
in commit 8b8a321f ("tcp: fix zero cwnd in tcp_cwnd_reduction")
can trigger, and without a repro we would have to spend
considerable time finding the bug.

Instead of complaining too late, we want to catch where
and when tp->snd_cwnd is set to an illegal value.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Suggested-by: default avatarYuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: default avatarYuchung Cheng <ycheng@google.com>
Link: https://lore.kernel.org/r/20220405233538.947344-1-eric.dumazet@gmail.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 487dc3ca
Loading
Loading
Loading
Loading
+15 −4
Original line number Diff line number Diff line
@@ -1207,9 +1207,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)

#define TCP_INFINITE_SSTHRESH	0x7fffffff

static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp)
{
	return tp->snd_cwnd;
}

static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val)
{
	WARN_ON_ONCE((int)val <= 0);
	tp->snd_cwnd = val;
}

static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
	return tp->snd_cwnd < tp->snd_ssthresh;
	return tcp_snd_cwnd(tp) < tp->snd_ssthresh;
}

static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
@@ -1235,8 +1246,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
		return tp->snd_ssthresh;
	else
		return max(tp->snd_ssthresh,
			   ((tp->snd_cwnd >> 1) +
			    (tp->snd_cwnd >> 2)));
			   ((tcp_snd_cwnd(tp) >> 1) +
			    (tcp_snd_cwnd(tp) >> 2)));
}

/* Use define here intentionally to get WARN_ON location shown at the caller */
@@ -1278,7 +1289,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)

	/* If in slow start, ensure cwnd grows to twice what was ACKed. */
	if (tcp_in_slow_start(tp))
		return tp->snd_cwnd < 2 * tp->max_packets_out;
		return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out;

	return tp->is_cwnd_limited;
}
+1 −1
Original line number Diff line number Diff line
@@ -279,7 +279,7 @@ TRACE_EVENT(tcp_probe,
		__entry->data_len = skb->len - __tcp_hdrlen(th);
		__entry->snd_nxt = tp->snd_nxt;
		__entry->snd_una = tp->snd_una;
		__entry->snd_cwnd = tp->snd_cwnd;
		__entry->snd_cwnd = tcp_snd_cwnd(tp);
		__entry->snd_wnd = tp->snd_wnd;
		__entry->rcv_wnd = tp->rcv_wnd;
		__entry->ssthresh = tcp_current_ssthresh(sk);
+1 −1
Original line number Diff line number Diff line
@@ -5173,7 +5173,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
				if (val <= 0 || tp->data_segs_out > tp->syn_data)
					ret = -EINVAL;
				else
					tp->snd_cwnd = val;
					tcp_snd_cwnd_set(tp, val);
				break;
			case TCP_BPF_SNDCWND_CLAMP:
				if (val <= 0) {
+4 −4
Original line number Diff line number Diff line
@@ -429,7 +429,7 @@ void tcp_init_sock(struct sock *sk)
	 * algorithms that we must have the following bandaid to talk
	 * efficiently to them.  -DaveM
	 */
	tp->snd_cwnd = TCP_INIT_CWND;
	tcp_snd_cwnd_set(tp, TCP_INIT_CWND);

	/* There's a bubble in the pipe until at least the first ACK. */
	tp->app_limited = ~0U;
@@ -3033,7 +3033,7 @@ int tcp_disconnect(struct sock *sk, int flags)
	icsk->icsk_rto_min = TCP_RTO_MIN;
	icsk->icsk_delack_max = TCP_DELACK_MAX;
	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
	tp->snd_cwnd = TCP_INIT_CWND;
	tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
	tp->snd_cwnd_cnt = 0;
	tp->window_clamp = 0;
	tp->delivered = 0;
@@ -3744,7 +3744,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
	info->tcpi_max_pacing_rate = rate64;

	info->tcpi_reordering = tp->reordering;
	info->tcpi_snd_cwnd = tp->snd_cwnd;
	info->tcpi_snd_cwnd = tcp_snd_cwnd(tp);

	if (info->tcpi_state == TCP_LISTEN) {
		/* listeners aliased fields :
@@ -3915,7 +3915,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
	rate64 = tcp_compute_delivery_rate(tp);
	nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);

	nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
	nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
	nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
	nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));

+10 −10
Original line number Diff line number Diff line
@@ -276,7 +276,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
	} else {			 /* no RTT sample yet */
		rtt_us = USEC_PER_MSEC;	 /* use nominal default RTT */
	}
	bw = (u64)tp->snd_cwnd * BW_UNIT;
	bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
	do_div(bw, rtt_us);
	sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
@@ -323,9 +323,9 @@ static void bbr_save_cwnd(struct sock *sk)
	struct bbr *bbr = inet_csk_ca(sk);

	if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
		bbr->prior_cwnd = tp->snd_cwnd;  /* this cwnd is good enough */
		bbr->prior_cwnd = tcp_snd_cwnd(tp);  /* this cwnd is good enough */
	else  /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
		bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
		bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}

static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
@@ -482,7 +482,7 @@ static bool bbr_set_cwnd_to_recover_or_restore(
	struct tcp_sock *tp = tcp_sk(sk);
	struct bbr *bbr = inet_csk_ca(sk);
	u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
	u32 cwnd = tp->snd_cwnd;
	u32 cwnd = tcp_snd_cwnd(tp);

	/* An ACK for P pkts should release at most 2*P packets. We do this
	 * in two steps. First, here we deduct the number of lost packets.
@@ -520,7 +520,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct bbr *bbr = inet_csk_ca(sk);
	u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
	u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;

	if (!acked)
		goto done;  /* no packet fully ACKed; just apply caps */
@@ -544,9 +544,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
	cwnd = max(cwnd, bbr_cwnd_min_target);

done:
	tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);	/* apply global cap */
	tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));	/* apply global cap */
	if (bbr->mode == BBR_PROBE_RTT)  /* drain queue, refresh min_rtt */
		tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
		tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
}

/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
@@ -856,7 +856,7 @@ static void bbr_update_ack_aggregation(struct sock *sk,
	bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
				     bbr->ack_epoch_acked + rs->acked_sacked);
	extra_acked = bbr->ack_epoch_acked - expected_acked;
	extra_acked = min(extra_acked, tp->snd_cwnd);
	extra_acked = min(extra_acked, tcp_snd_cwnd(tp));
	if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
		bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
}
@@ -914,7 +914,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
		return;

	bbr->min_rtt_stamp = tcp_jiffies32;  /* wait a while until PROBE_RTT */
	tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
	tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
	bbr_reset_mode(sk);
}

@@ -1093,7 +1093,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
	bbr->full_bw = 0;   /* spurious slow-down; reset full pipe detection */
	bbr->full_bw_cnt = 0;
	bbr_reset_lt_bw_sampling(sk);
	return tcp_sk(sk)->snd_cwnd;
	return tcp_snd_cwnd(tcp_sk(sk));
}

/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
Loading