Commit 8cb79af5 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'skb-gro-optimize'



Paolo Abeni says:

====================
sk_buff: optimize GRO for the common case

This is a trimmed down revision of "sk_buff: optimize layout for GRO",
specifically dropping the changes to the sk_buff layout[1].

This series tries to accomplish 2 goals:
- optimize the GRO stage for the most common scenario, avoiding a bunch
  of conditional and some more code
- let owned skbs entering the GRO engine, allowing backpressure in the
  veth GRO forward path.

A new sk_buff flag (!!!) is introduced and maintained for GRO's sake.
Such field uses an existing hole, so there is no change to the sk_buff
size.

[1] two main reasons:
- move skb->inner_ field requires some extra care, as some in kernel
  users access and the fields regardless of skb->encapsulation.
- extending secmark size clash with ct and nft uAPIs

address the all above is possible, I think, but for sure not in a single
series.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 883d71a5 d504fff0
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -713,7 +713,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
	int mac_len, delta, off;
	struct xdp_buff xdp;

	skb_orphan_partial(skb);
	skb_prepare_for_gro(skb);

	rcu_read_lock();
	xdp_prog = rcu_dereference(rq->xdp_prog);
+6 −0
Original line number Diff line number Diff line
@@ -689,6 +689,7 @@ typedef unsigned char *sk_buff_data_t;
 *		CHECKSUM_UNNECESSARY (max 3)
 *	@dst_pending_confirm: need to confirm neighbour
 *	@decrypted: Decrypted SKB
 *	@slow_gro: state present at GRO time, slower prepare step required
 *	@napi_id: id of the NAPI struct this skb came from
 *	@sender_cpu: (aka @napi_id) source CPU in XPS
 *	@secmark: security marking
@@ -870,6 +871,7 @@ struct sk_buff {
#ifdef CONFIG_TLS_DEVICE
	__u8			decrypted:1;
#endif
	__u8			slow_gro:1;

#ifdef CONFIG_NET_SCHED
	__u16			tc_index;	/* traffic control index */
@@ -990,6 +992,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 */
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
{
	skb->slow_gro |= !!dst;
	skb->_skb_refdst = (unsigned long)dst;
}

@@ -1006,6 +1009,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
	skb->slow_gro = !!dst;
	skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}

@@ -4216,6 +4220,7 @@ static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	skb->slow_gro |= !!nfct;
	skb->_nfct = nfct;
#endif
}
@@ -4375,6 +4380,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	nf_conntrack_put(skb_nfct(dst));
#endif
	dst->slow_gro = src->slow_gro;
	__nf_copy(dst, src, true);
}

+2 −0
Original line number Diff line number Diff line
@@ -277,6 +277,7 @@ static inline void skb_dst_drop(struct sk_buff *skb)

static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
{
	nskb->slow_gro |= !!refdst;
	nskb->_skb_refdst = refdst;
	if (!(nskb->_skb_refdst & SKB_DST_NOREF))
		dst_clone(skb_dst(nskb));
@@ -316,6 +317,7 @@ static inline bool skb_dst_force(struct sk_buff *skb)
			dst = NULL;

		skb->_skb_refdst = (unsigned long)dst;
		skb->slow_gro |= !!dst;
	}

	return skb->_skb_refdst != 0UL;
+9 −0
Original line number Diff line number Diff line
@@ -2249,6 +2249,15 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
	return false;
}

static inline void skb_prepare_for_gro(struct sk_buff *skb)
{
	if (skb->destructor != sock_wfree) {
		skb_orphan(skb);
		return;
	}
	skb->slow_gro = 1;
}

void sk_reset_timer(struct sock *sk, struct timer_list *timer,
		    unsigned long expires);

+24 −8
Original line number Diff line number Diff line
@@ -6014,7 +6014,6 @@ static void gro_list_prepare(const struct list_head *head,
		diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
		if (skb_vlan_tag_present(p))
			diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
		diffs |= skb_metadata_dst_cmp(p, skb);
		diffs |= skb_metadata_differs(p, skb);
		if (maclen == ETH_HLEN)
			diffs |= compare_ether_header(skb_mac_header(p),
@@ -6024,17 +6023,30 @@ static void gro_list_prepare(const struct list_head *head,
				       skb_mac_header(skb),
				       maclen);

		/* in most common scenarions _state is 0
		 * otherwise we are already on some slower paths
		 * either skip all the infrequent tests altogether or
		 * avoid trying too hard to skip each of them individually
		 */
		if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
			struct tc_skb_ext *skb_ext;
			struct tc_skb_ext *p_ext;
#endif

			diffs |= p->sk != skb->sk;
			diffs |= skb_metadata_dst_cmp(p, skb);
			diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);

#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
		if (!diffs) {
			struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT);
			struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT);
			skb_ext = skb_ext_find(skb, TC_SKB_EXT);
			p_ext = skb_ext_find(p, TC_SKB_EXT);

			diffs |= (!!p_ext) ^ (!!skb_ext);
			if (!diffs && unlikely(skb_ext))
				diffs |= p_ext->chain ^ skb_ext->chain;
		}
#endif
		}

		NAPI_GRO_CB(p)->same_flow = !diffs;
	}
@@ -6299,8 +6311,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
	skb->encapsulation = 0;
	skb_shinfo(skb)->gso_type = 0;
	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
	if (unlikely(skb->slow_gro)) {
		skb_orphan(skb);
		skb_ext_reset(skb);
		nf_reset_ct(skb);
		skb->slow_gro = 0;
	}

	napi->skb = skb;
}
Loading