Commit adfb62db authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp_drop_reason'



Menglong Dong says:

====================
net: add skb drop reasons to TCP packet receive

In the commit c504e5c2 ("net: skb: introduce kfree_skb_reason()"),
we added the support of reporting the reasons of skb drops to kfree_skb
tracepoint. And in this series patches, reasons for skb drops are added
to TCP layer (both TCPv4 and TCPv6 are considered).
Following functions are processed:

tcp_v4_rcv()
tcp_v6_rcv()
tcp_v4_inbound_md5_hash()
tcp_v6_inbound_md5_hash()
tcp_add_backlog()
tcp_v4_do_rcv()
tcp_v6_do_rcv()
tcp_rcv_established()
tcp_data_queue()
tcp_data_queue_ofo()

The functions we handled are mostly for packet ingress, as skb drops
hardly happens in the egress path of TCP layer. However, it's a little
complex for TCP state processing, as I find that it's hard to report skb
drop reasons to where it is freed. For example, when skb is dropped in
tcp_rcv_state_process(), the reason can be caused by the call of
tcp_v4_conn_request(), and it's hard to return a drop reason from
tcp_v4_conn_request(). So such cases are skipped  for this moment.

Following new drop reasons are introduced (what they mean can be see
in the document for them):

/* SKB_DROP_REASON_TCP_MD5* corresponding to LINUX_MIB_TCPMD5* */
SKB_DROP_REASON_TCP_MD5NOTFOUND
SKB_DROP_REASON_TCP_MD5UNEXPECTED
SKB_DROP_REASON_TCP_MD5FAILURE
SKB_DROP_REASON_SOCKET_BACKLOG
SKB_DROP_REASON_TCP_FLAGS
SKB_DROP_REASON_TCP_ZEROWINDOW
SKB_DROP_REASON_TCP_OLD_DATA
SKB_DROP_REASON_TCP_OVERWINDOW
/* corresponding to LINUX_MIB_TCPOFOMERGE */
SKB_DROP_REASON_TCP_OFOMERGE

Here is a example to get TCP packet drop reasons from ftrace:

$ echo 1 > /sys/kernel/debug/tracing/events/skb/kfree_skb/enable
$ cat /sys/kernel/debug/tracing/trace
$ <idle>-0       [036] ..s1.   647.428165: kfree_skb: skbaddr=000000004d037db6 protocol=2048 location=0000000074cd1243 reason: NO_SOCKET
$ <idle>-0       [020] ..s2.   639.676674: kfree_skb: skbaddr=00000000bcbfa42d protocol=2048 location=00000000bfe89d35 reason: PROTO_MEM

From the reason 'PROTO_MEM' we can know that the skb is dropped because
the memory configured in net.ipv4.tcp_mem is up to the limition.

Changes since v2:
- remove the 'inline' of tcp_drop() in the 1th patch, as Jakub
  suggested

Changes since v1:
- enrich the document for this series patches in the cover letter,
  as Eric suggested
- fix compile warning report by Jakub in the 6th patch
- let NO_SOCKET trump the XFRM failure in the 2th and 3th patches
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 48c77bdf d25e481b
Loading
Loading
Loading
Loading
+34 −0
Original line number Diff line number Diff line
@@ -346,6 +346,40 @@ enum skb_drop_reason {
					 * udp packet drop out of
					 * udp_memory_allocated.
					 */
	SKB_DROP_REASON_TCP_MD5NOTFOUND,	/* no MD5 hash and one
						 * expected, corresponding
						 * to LINUX_MIB_TCPMD5NOTFOUND
						 */
	SKB_DROP_REASON_TCP_MD5UNEXPECTED,	/* MD5 hash and we're not
						 * expecting one, corresponding
						 * to LINUX_MIB_TCPMD5UNEXPECTED
						 */
	SKB_DROP_REASON_TCP_MD5FAILURE,	/* MD5 hash and its wrong,
					 * corresponding to
					 * LINUX_MIB_TCPMD5FAILURE
					 */
	SKB_DROP_REASON_SOCKET_BACKLOG,	/* failed to add skb to socket
					 * backlog (see
					 * LINUX_MIB_TCPBACKLOGDROP)
					 */
	SKB_DROP_REASON_TCP_FLAGS,	/* TCP flags invalid */
	SKB_DROP_REASON_TCP_ZEROWINDOW,	/* TCP receive window size is zero,
					 * see LINUX_MIB_TCPZEROWINDOWDROP
					 */
	SKB_DROP_REASON_TCP_OLD_DATA,	/* the TCP data reveived is already
					 * received before (spurious retrans
					 * may happened), see
					 * LINUX_MIB_DELAYEDACKLOST
					 */
	SKB_DROP_REASON_TCP_OVERWINDOW,	/* the TCP data is out of window,
					 * the seq of the first byte exceed
					 * the right edges of receive
					 * window
					 */
	SKB_DROP_REASON_TCP_OFOMERGE,	/* the data of skb is already in
					 * the ofo queue, corresponding to
					 * LINUX_MIB_TCPOFOMERGE
					 */
	SKB_DROP_REASON_MAX,
};

+2 −1
Original line number Diff line number Diff line
@@ -1367,7 +1367,8 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
		__skb_checksum_complete(skb);
}

bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
		     enum skb_drop_reason *reason);

#ifdef CONFIG_INET
void __sk_defer_free_flush(struct sock *sk);
+10 −0
Original line number Diff line number Diff line
@@ -27,6 +27,16 @@
	EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO)		\
	EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF)	\
	EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM)		\
	EM(SKB_DROP_REASON_TCP_MD5NOTFOUND, TCP_MD5NOTFOUND)	\
	EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED,			\
	   TCP_MD5UNEXPECTED)					\
	EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE)	\
	EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG)	\
	EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS)		\
	EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW)	\
	EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA)		\
	EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW)	\
	EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE)		\
	EMe(SKB_DROP_REASON_MAX, MAX)

#undef EM
+32 −10
Original line number Diff line number Diff line
@@ -4684,10 +4684,16 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
	return res;
}

static void tcp_drop(struct sock *sk, struct sk_buff *skb)
static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
			    enum skb_drop_reason reason)
{
	sk_drops_add(sk, skb);
	__kfree_skb(skb);
	kfree_skb_reason(skb, reason);
}

static void tcp_drop(struct sock *sk, struct sk_buff *skb)
{
	tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED);
}

/* This one checks to see if we can put data from the
@@ -4773,7 +4779,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
		sk->sk_data_ready(sk);
		tcp_drop(sk, skb);
		tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM);
		return;
	}

@@ -4836,7 +4842,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
				/* All the bits are present. Drop. */
				NET_INC_STATS(sock_net(sk),
					      LINUX_MIB_TCPOFOMERGE);
				tcp_drop(sk, skb);
				tcp_drop_reason(sk, skb,
						SKB_DROP_REASON_TCP_OFOMERGE);
				skb = NULL;
				tcp_dsack_set(sk, seq, end_seq);
				goto add_sack;
@@ -4855,7 +4862,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
						 TCP_SKB_CB(skb1)->end_seq);
				NET_INC_STATS(sock_net(sk),
					      LINUX_MIB_TCPOFOMERGE);
				tcp_drop(sk, skb1);
				tcp_drop_reason(sk, skb1,
						SKB_DROP_REASON_TCP_OFOMERGE);
				goto merge_right;
			}
		} else if (tcp_ooo_try_coalesce(sk, skb1,
@@ -4883,7 +4891,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
				 TCP_SKB_CB(skb1)->end_seq);
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
		tcp_drop(sk, skb1);
		tcp_drop_reason(sk, skb1, SKB_DROP_REASON_TCP_OFOMERGE);
	}
	/* If there is no skb after us, we are the last_skb ! */
	if (!skb1)
@@ -4982,6 +4990,7 @@ void tcp_data_ready(struct sock *sk)
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
	struct tcp_sock *tp = tcp_sk(sk);
	enum skb_drop_reason reason;
	bool fragstolen;
	int eaten;

@@ -5000,6 +5009,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
	skb_dst_drop(skb);
	__skb_pull(skb, tcp_hdr(skb)->doff * 4);

	reason = SKB_DROP_REASON_NOT_SPECIFIED;
	tp->rx_opt.dsack = 0;

	/*  Queue data for delivery to the user.
@@ -5008,6 +5018,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
	 */
	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
		if (tcp_receive_window(tp) == 0) {
			reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
			goto out_of_window;
		}
@@ -5017,6 +5028,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
		if (skb_queue_len(&sk->sk_receive_queue) == 0)
			sk_forced_mem_schedule(sk, skb->truesize);
		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
			reason = SKB_DROP_REASON_PROTO_MEM;
			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
			sk->sk_data_ready(sk);
			goto drop;
@@ -5053,6 +5065,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
		tcp_rcv_spurious_retrans(sk, skb);
		/* A retransmit, 2nd most common case.  Force an immediate ack. */
		reason = SKB_DROP_REASON_TCP_OLD_DATA;
		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);

@@ -5060,13 +5073,16 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
		inet_csk_schedule_ack(sk);
drop:
		tcp_drop(sk, skb);
		tcp_drop_reason(sk, skb, reason);
		return;
	}

	/* Out of window. F.e. zero window probe. */
	if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
	if (!before(TCP_SKB_CB(skb)->seq,
		    tp->rcv_nxt + tcp_receive_window(tp))) {
		reason = SKB_DROP_REASON_TCP_OVERWINDOW;
		goto out_of_window;
	}

	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
		/* Partial packet, seq < rcv_next < end_seq */
@@ -5076,6 +5092,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
		 * remembering D-SACK for its head made in previous line.
		 */
		if (!tcp_receive_window(tp)) {
			reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
			goto out_of_window;
		}
@@ -5781,6 +5798,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 */
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
{
	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
	const struct tcphdr *th = (const struct tcphdr *)skb->data;
	struct tcp_sock *tp = tcp_sk(sk);
	unsigned int len = skb->len;
@@ -5869,6 +5887,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
				tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
				return;
			} else { /* Header too small */
				reason = SKB_DROP_REASON_PKT_TOO_SMALL;
				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
				goto discard;
			}
@@ -5924,8 +5943,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
		goto csum_error;

	if (!th->ack && !th->rst && !th->syn)
	if (!th->ack && !th->rst && !th->syn) {
		reason = SKB_DROP_REASON_TCP_FLAGS;
		goto discard;
	}

	/*
	 *	Standard slow path.
@@ -5951,12 +5972,13 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
	return;

csum_error:
	reason = SKB_DROP_REASON_TCP_CSUM;
	trace_tcp_bad_csum(skb);
	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);

discard:
	tcp_drop(sk, skb);
	tcp_drop_reason(sk, skb, reason);
}
EXPORT_SYMBOL(tcp_rcv_established);

+24 −8
Original line number Diff line number Diff line
@@ -1412,7 +1412,8 @@ EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
/* Called with rcu_read_lock() */
static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
				    const struct sk_buff *skb,
				    int dif, int sdif)
				    int dif, int sdif,
				    enum skb_drop_reason *reason)
{
#ifdef CONFIG_TCP_MD5SIG
	/*
@@ -1445,11 +1446,13 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
		return false;

	if (hash_expected && !hash_location) {
		*reason = SKB_DROP_REASON_TCP_MD5NOTFOUND;
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
		return true;
	}

	if (!hash_expected && hash_location) {
		*reason = SKB_DROP_REASON_TCP_MD5UNEXPECTED;
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
		return true;
	}
@@ -1462,6 +1465,7 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
				      NULL, skb);

	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
		*reason = SKB_DROP_REASON_TCP_MD5FAILURE;
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
				     &iph->saddr, ntohs(th->source),
@@ -1704,6 +1708,7 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
 */
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
	enum skb_drop_reason reason;
	struct sock *rsk;

	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1726,6 +1731,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
		return 0;
	}

	reason = SKB_DROP_REASON_NOT_SPECIFIED;
	if (tcp_checksum_complete(skb))
		goto csum_err;

@@ -1753,7 +1759,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
reset:
	tcp_v4_send_reset(rsk, skb);
discard:
	kfree_skb(skb);
	kfree_skb_reason(skb, reason);
	/* Be careful here. If this function gets more complicated and
	 * gcc suffers from register pressure on the x86, sk (in %ebx)
	 * might be destroyed here. This current version compiles correctly,
@@ -1762,6 +1768,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
	return 0;

csum_err:
	reason = SKB_DROP_REASON_TCP_CSUM;
	trace_tcp_bad_csum(skb);
	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -1807,7 +1814,8 @@ int tcp_v4_early_demux(struct sk_buff *skb)
	return 0;
}

bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
		     enum skb_drop_reason *reason)
{
	u32 limit, tail_gso_size, tail_gso_segs;
	struct skb_shared_info *shinfo;
@@ -1833,6 +1841,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
	if (unlikely(tcp_checksum_complete(skb))) {
		bh_unlock_sock(sk);
		trace_tcp_bad_csum(skb);
		*reason = SKB_DROP_REASON_TCP_CSUM;
		__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
		__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
		return true;
@@ -1921,6 +1930,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)

	if (unlikely(sk_add_backlog(sk, skb, limit))) {
		bh_unlock_sock(sk);
		*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
		return true;
	}
@@ -1971,13 +1981,13 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
int tcp_v4_rcv(struct sk_buff *skb)
{
	struct net *net = dev_net(skb->dev);
	enum skb_drop_reason drop_reason;
	int sdif = inet_sdif(skb);
	int dif = inet_iif(skb);
	const struct iphdr *iph;
	const struct tcphdr *th;
	bool refcounted;
	struct sock *sk;
	int drop_reason;
	int ret;

	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -2025,7 +2035,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
		struct sock *nsk;

		sk = req->rsk_listener;
		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif,
						     &drop_reason))) {
			sk_drops_add(sk, skb);
			reqsk_put(req);
			goto discard_it;
@@ -2057,6 +2068,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
			iph = ip_hdr(skb);
			tcp_v4_fill_cb(skb, iph, th);
			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
		} else {
			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
		}
		if (!nsk) {
			reqsk_put(req);
@@ -2092,10 +2105,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
		}
	}

	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
		goto discard_and_relse;
	}

	if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))
	if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif, &drop_reason))
		goto discard_and_relse;

	nf_reset_ct(skb);
@@ -2124,7 +2139,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
	if (!sock_owned_by_user(sk)) {
		ret = tcp_v4_do_rcv(sk, skb);
	} else {
		if (tcp_add_backlog(sk, skb))
		if (tcp_add_backlog(sk, skb, &drop_reason))
			goto discard_and_relse;
	}
	bh_unlock_sock(sk);
@@ -2166,6 +2181,7 @@ int tcp_v4_rcv(struct sk_buff *skb)

do_time_wait:
	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
		inet_twsk_put(inet_twsk(sk));
		goto discard_it;
	}
Loading