Commit 3f79eb3c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'gro-micro-optimize-dev_gro_receive'

Alexander Lobakin says:

====================
gro: micro-optimize dev_gro_receive()

This random series addresses some of suboptimal constructions used
in the main GRO entry point.
The main body is gro_list_prepare() simplification and pointer usage
optimization in dev_gro_receive() itself. Being mostly cosmetic, it
gives like +10 Mbps on my setup to both TCP and UDP (both single- and
multi-flow).

Since v1 [0]:
 - drop the replacement of bucket index calculation with
   reciprocal_scale() since it makes absolutely no sense (Eric);
 - improve stack usage in dev_gro_receive() (Eric);
 - reverse the order of patches to avoid changes superseding.

[0] https://lore.kernel.org/netdev/20210312162127.239795-1-alobakin@pm.me


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f4e6d7cd d0eed5c3
Loading
Loading
Loading
Loading
+18 −22
Original line number Diff line number Diff line
@@ -5858,15 +5858,13 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
}
EXPORT_SYMBOL(napi_gro_flush);

static struct list_head *gro_list_prepare(struct napi_struct *napi,
					  struct sk_buff *skb)
static void gro_list_prepare(const struct list_head *head,
			     const struct sk_buff *skb)
{
	unsigned int maclen = skb->dev->hard_header_len;
	u32 hash = skb_get_hash_raw(skb);
	struct list_head *head;
	struct sk_buff *p;

	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
	list_for_each_entry(p, head, list) {
		unsigned long diffs;

@@ -5892,8 +5890,6 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
				       maclen);
		NAPI_GRO_CB(p)->same_flow = !diffs;
	}

	return head;
}

static void skb_gro_reset_offset(struct sk_buff *skb)
@@ -5956,11 +5952,11 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)

static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
	u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
	struct gro_list *gro_list = &napi->gro_hash[bucket];
	struct list_head *head = &offload_base;
	struct packet_offload *ptype;
	__be16 type = skb->protocol;
	struct list_head *gro_head;
	struct sk_buff *pp = NULL;
	enum gro_result ret;
	int same_flow;
@@ -5969,7 +5965,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
	if (netif_elide_gro(skb->dev))
		goto normal;

	gro_head = gro_list_prepare(napi, skb);
	gro_list_prepare(&gro_list->list, skb);

	rcu_read_lock();
	list_for_each_entry_rcu(ptype, head, list) {
@@ -6005,7 +6001,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff

		pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
					ipv6_gro_receive, inet_gro_receive,
					gro_head, skb);
					&gro_list->list, skb);
		break;
	}
	rcu_read_unlock();
@@ -6024,7 +6020,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
	if (pp) {
		skb_list_del_init(pp);
		napi_gro_complete(napi, pp);
		napi->gro_hash[hash].count--;
		gro_list->count--;
	}

	if (same_flow)
@@ -6033,16 +6029,16 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
	if (NAPI_GRO_CB(skb)->flush)
		goto normal;

	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
		gro_flush_oldest(napi, gro_head);
	} else {
		napi->gro_hash[hash].count++;
	}
	if (unlikely(gro_list->count >= MAX_GRO_SKBS))
		gro_flush_oldest(napi, &gro_list->list);
	else
		gro_list->count++;

	NAPI_GRO_CB(skb)->count = 1;
	NAPI_GRO_CB(skb)->age = jiffies;
	NAPI_GRO_CB(skb)->last = skb;
	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
	list_add(&skb->list, gro_head);
	list_add(&skb->list, &gro_list->list);
	ret = GRO_HELD;

pull:
@@ -6050,11 +6046,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
	if (grow > 0)
		gro_pull_from_frag0(skb, grow);
ok:
	if (napi->gro_hash[hash].count) {
		if (!test_bit(hash, &napi->gro_bitmask))
			__set_bit(hash, &napi->gro_bitmask);
	} else if (test_bit(hash, &napi->gro_bitmask)) {
		__clear_bit(hash, &napi->gro_bitmask);
	if (gro_list->count) {
		if (!test_bit(bucket, &napi->gro_bitmask))
			__set_bit(bucket, &napi->gro_bitmask);
	} else if (test_bit(bucket, &napi->gro_bitmask)) {
		__clear_bit(bucket, &napi->gro_bitmask);
	}

	return ret;