Commit 53c33a16 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-drop-reason-additions'



Eric Dumazet says:

====================
tcp: drop reason additions

Currently, TCP is either missing drop reasons,
or pretending that some useful packets are dropped.

This patch series makes "perf record -a -e skb:kfree_skb"
much more usable.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7925c2d9 8fbf1957
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -381,6 +381,19 @@ enum skb_drop_reason {
					 * the ofo queue, corresponding to
					 * LINUX_MIB_TCPOFOMERGE
					 */
	SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to
					   * LINUX_MIB_PAWSESTABREJECTED
					   */
	SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */
	SKB_DROP_REASON_TCP_RESET,	/* Invalid RST packet */
	SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */
	SKB_DROP_REASON_TCP_CLOSE,	/* TCP socket in CLOSE state */
	SKB_DROP_REASON_TCP_FASTOPEN,	/* dropped by FASTOPEN request socket */
	SKB_DROP_REASON_TCP_OLD_ACK,	/* TCP ACK is old, but in window */
	SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */
	SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */
	SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */
	SKB_DROP_REASON_TCP_OFO_DROP,	/* data already in receive queue */
	SKB_DROP_REASON_IP_OUTNOROUTES,	/* route lookup failed */
	SKB_DROP_REASON_BPF_CGROUP_EGRESS,	/* dropped by
						 * BPF_PROG_TYPE_CGROUP_SKB
+14 −0
Original line number Diff line number Diff line
@@ -37,6 +37,20 @@
	EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA)		\
	EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW)	\
	EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE)		\
	EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP)		\
	EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS)	\
	EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE,		\
	   TCP_INVALID_SEQUENCE)				\
	EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET)		\
	EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN)	\
	EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE)		\
	EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN)		\
	EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK)		\
	EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK)	\
	EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA,			\
	   TCP_ACK_UNSENT_DATA)					\
	EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE,			\
	  TCP_OFO_QUEUE_PRUNE)					\
	EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES)	\
	EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS,			\
	   BPF_CGROUP_EGRESS)					\
+73 −54
Original line number Diff line number Diff line
@@ -3766,7 +3766,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
		if (before(ack, prior_snd_una - tp->max_window)) {
			if (!(flag & FLAG_NO_CHALLENGE_ACK))
				tcp_send_challenge_ack(sk);
			return -1;
			return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
		}
		goto old_ack;
	}
@@ -3775,7 +3775,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
	 * this segment (RFC793 Section 3.9).
	 */
	if (after(ack, tp->snd_nxt))
		return -1;
		return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;

	if (after(ack, prior_snd_una)) {
		flag |= FLAG_SND_UNA_ADVANCED;
@@ -4674,7 +4674,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
{
	bool res = tcp_try_coalesce(sk, to, from, fragstolen);

	/* In case tcp_drop() is called later, update to->gso_segs */
	/* In case tcp_drop_reason() is called later, update to->gso_segs */
	if (res) {
		u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
			       max_t(u16, 1, skb_shinfo(from)->gso_segs);
@@ -4691,11 +4691,6 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
	kfree_skb_reason(skb, reason);
}

static void tcp_drop(struct sock *sk, struct sk_buff *skb)
{
	tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED);
}

/* This one checks to see if we can put data from the
 * out_of_order queue into the receive_queue.
 */
@@ -4723,7 +4718,7 @@ static void tcp_ofo_queue(struct sock *sk)
		rb_erase(&skb->rbnode, &tp->out_of_order_queue);

		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
			tcp_drop(sk, skb);
			tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP);
			continue;
		}

@@ -5334,7 +5329,8 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
		prev = rb_prev(node);
		rb_erase(node, &tp->out_of_order_queue);
		goal -= rb_to_skb(node)->truesize;
		tcp_drop(sk, rb_to_skb(node));
		tcp_drop_reason(sk, rb_to_skb(node),
				SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
		if (!prev || goal <= 0) {
			sk_mem_reclaim(sk);
			if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
@@ -5667,7 +5663,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
				  const struct tcphdr *th, int syn_inerr)
{
	struct tcp_sock *tp = tcp_sk(sk);
	bool rst_seq_match = false;
	SKB_DR(reason);

	/* RFC1323: H1. Apply PAWS check first. */
	if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
@@ -5679,6 +5675,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
						  LINUX_MIB_TCPACKSKIPPEDPAWS,
						  &tp->last_oow_ack_time))
				tcp_send_dupack(sk, skb);
			SKB_DR_SET(reason, TCP_RFC7323_PAWS);
			goto discard;
		}
		/* Reset is accepted even if it did not pass PAWS. */
@@ -5700,8 +5697,9 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
						  &tp->last_oow_ack_time))
				tcp_send_dupack(sk, skb);
		} else if (tcp_reset_check(sk, skb)) {
			tcp_reset(sk, skb);
			goto reset;
		}
		SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
		goto discard;
	}

@@ -5717,9 +5715,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
		 *     Send a challenge ACK
		 */
		if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
		    tcp_reset_check(sk, skb)) {
			rst_seq_match = true;
		} else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
		    tcp_reset_check(sk, skb))
			goto reset;

		if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
			struct tcp_sack_block *sp = &tp->selective_acks[0];
			int max_sack = sp[0].end_seq;
			int this_sack;
@@ -5732,12 +5731,9 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
			}

			if (TCP_SKB_CB(skb)->seq == max_sack)
				rst_seq_match = true;
				goto reset;
		}

		if (rst_seq_match)
			tcp_reset(sk, skb);
		else {
		/* Disable TFO if RST is out-of-order
		 * and no data has been received
		 * for current active TFO socket
@@ -5746,7 +5742,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
		    sk->sk_state == TCP_ESTABLISHED)
			tcp_fastopen_active_disable(sk);
		tcp_send_challenge_ack(sk);
		}
		SKB_DR_SET(reason, TCP_RESET);
		goto discard;
	}

@@ -5761,6 +5757,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
			TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
		tcp_send_challenge_ack(sk);
		SKB_DR_SET(reason, TCP_INVALID_SYN);
		goto discard;
	}

@@ -5769,7 +5766,12 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
	return true;

discard:
	tcp_drop(sk, skb);
	tcp_drop_reason(sk, skb, reason);
	return false;

reset:
	tcp_reset(sk, skb);
	__kfree_skb(skb);
	return false;
}

@@ -5956,7 +5958,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
		return;

step5:
	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
	reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
	if (reason < 0)
		goto discard;

	tcp_rcv_rtt_measure_ts(sk, skb);
@@ -6136,6 +6139,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
	struct tcp_fastopen_cookie foc = { .len = -1 };
	int saved_clamp = tp->rx_opt.mss_clamp;
	bool fastopen_fail;
	SKB_DR(reason);

	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
@@ -6178,7 +6182,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,

		if (th->rst) {
			tcp_reset(sk, skb);
			goto discard;
consume:
			__kfree_skb(skb);
			return 0;
		}

		/* rfc793:
@@ -6188,9 +6194,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
		 *    See note below!
		 *                                        --ANK(990513)
		 */
		if (!th->syn)
		if (!th->syn) {
			SKB_DR_SET(reason, TCP_FLAGS);
			goto discard_and_undo;

		}
		/* rfc793:
		 *   "If the SYN bit is on ...
		 *    are acceptable then ...
@@ -6267,13 +6274,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
						  TCP_DELACK_MAX, TCP_RTO_MAX);

discard:
			tcp_drop(sk, skb);
			return 0;
		} else {
			tcp_send_ack(sk);
			goto consume;
		}
		tcp_send_ack(sk);
		return -1;
	}

@@ -6285,15 +6288,16 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
		 *
		 *      Otherwise (no ACK) drop the segment and return."
		 */

		SKB_DR_SET(reason, TCP_RESET);
		goto discard_and_undo;
	}

	/* PAWS check. */
	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
	    tcp_paws_reject(&tp->rx_opt, 0))
	    tcp_paws_reject(&tp->rx_opt, 0)) {
		SKB_DR_SET(reason, TCP_RFC7323_PAWS);
		goto discard_and_undo;

	}
	if (th->syn) {
		/* We see SYN without ACK. It is attempt of
		 * simultaneous connect with crossed SYNs.
@@ -6342,7 +6346,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
		 */
		return -1;
#else
		goto discard;
		goto consume;
#endif
	}
	/* "fifth, if neither of the SYN or RST bits is set then
@@ -6352,7 +6356,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
discard_and_undo:
	tcp_clear_options(&tp->rx_opt);
	tp->rx_opt.mss_clamp = saved_clamp;
	goto discard;
	tcp_drop_reason(sk, skb, reason);
	return 0;

reset_and_undo:
	tcp_clear_options(&tp->rx_opt);
@@ -6407,21 +6412,26 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
	struct request_sock *req;
	int queued = 0;
	bool acceptable;
	SKB_DR(reason);

	switch (sk->sk_state) {
	case TCP_CLOSE:
		SKB_DR_SET(reason, TCP_CLOSE);
		goto discard;

	case TCP_LISTEN:
		if (th->ack)
			return 1;

		if (th->rst)
		if (th->rst) {
			SKB_DR_SET(reason, TCP_RESET);
			goto discard;

		}
		if (th->syn) {
			if (th->fin)
			if (th->fin) {
				SKB_DR_SET(reason, TCP_FLAGS);
				goto discard;
			}
			/* It is possible that we process SYN packets from backlog,
			 * so we need to make sure to disable BH and RCU right there.
			 */
@@ -6436,6 +6446,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
			consume_skb(skb);
			return 0;
		}
		SKB_DR_SET(reason, TCP_FLAGS);
		goto discard;

	case TCP_SYN_SENT:
@@ -6462,13 +6473,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
		    sk->sk_state != TCP_FIN_WAIT1);

		if (!tcp_check_req(sk, skb, req, true, &req_stolen))
		if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
			SKB_DR_SET(reason, TCP_FASTOPEN);
			goto discard;
		}
	}

	if (!th->ack && !th->rst && !th->syn)
	if (!th->ack && !th->rst && !th->syn) {
		SKB_DR_SET(reason, TCP_FLAGS);
		goto discard;

	}
	if (!tcp_validate_incoming(sk, skb, th, 0))
		return 0;

@@ -6481,6 +6495,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
		if (sk->sk_state == TCP_SYN_RECV)
			return 1;	/* send one RST */
		tcp_send_challenge_ack(sk);
		SKB_DR_SET(reason, TCP_OLD_ACK);
		goto discard;
	}
	switch (sk->sk_state) {
@@ -6574,7 +6589,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
			inet_csk_reset_keepalive_timer(sk, tmo);
		} else {
			tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
			goto discard;
			goto consume;
		}
		break;
	}
@@ -6582,7 +6597,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
	case TCP_CLOSING:
		if (tp->snd_una == tp->write_seq) {
			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
			goto discard;
			goto consume;
		}
		break;

@@ -6590,7 +6605,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
		if (tp->snd_una == tp->write_seq) {
			tcp_update_metrics(sk);
			tcp_done(sk);
			goto discard;
			goto consume;
		}
		break;
	}
@@ -6641,9 +6656,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)

	if (!queued) {
discard:
		tcp_drop(sk, skb);
		tcp_drop_reason(sk, skb, reason);
	}
	return 0;

consume:
	__kfree_skb(skb);
	return 0;
}
EXPORT_SYMBOL(tcp_rcv_state_process);