Commit 48c2afc1 authored by Eric Dumazet's avatar Eric Dumazet Committed by Greg Kroah-Hartman
Browse files

inet: frags: get rid of ipfrag_skb_cb/FRAG_CB



ip_defrag uses skb->cb[] to store the fragment offset, and unfortunately
this integer is currently in a different cache line than skb->next,
meaning that we use two cache lines per skb when finding the insertion point.

By aliasing skb->ip_defrag_offset and skb->dev, we pack all the fields
in a single cache line and save precious memory bandwidth.

Note that after the fast path added by Changli Gao in commit
d6bebca9 ("fragment: add fast path for in-order fragments")
this change wont help the fast path, since we still need
to access prev->len (2nd cache line), but will show great
benefits when slow path is entered, since we perform
a linear scan of a potentially long list.

Also, note that this potential long list is an attack vector,
we might consider also using an rb-tree there eventually.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
(cherry picked from commit bf663371)
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 8291cd94
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -678,6 +678,7 @@ struct sk_buff {
		 * UDP receive path is one user.
		 */
		unsigned long		dev_scratch;
		int			ip_defrag_offset;
	};
	/*
	 * This is the control buffer. It is free to use for every
+14 −21
Original line number Diff line number Diff line
@@ -57,14 +57,6 @@
 */
static const char ip_frag_cache_name[] = "ip4-frags";

struct ipfrag_skb_cb
{
	struct inet_skb_parm	h;
	int			offset;
};

#define FRAG_CB(skb)	((struct ipfrag_skb_cb *)((skb)->cb))

/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
	struct inet_frag_queue q;
@@ -353,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
	 * this fragment, right?
	 */
	prev = qp->q.fragments_tail;
	if (!prev || FRAG_CB(prev)->offset < offset) {
	if (!prev || prev->ip_defrag_offset < offset) {
		next = NULL;
		goto found;
	}
	prev = NULL;
	for (next = qp->q.fragments; next != NULL; next = next->next) {
		if (FRAG_CB(next)->offset >= offset)
		if (next->ip_defrag_offset >= offset)
			break;	/* bingo! */
		prev = next;
	}
@@ -370,7 +362,7 @@ found:
	 * any overlaps are eliminated.
	 */
	if (prev) {
		int i = (FRAG_CB(prev)->offset + prev->len) - offset;
		int i = (prev->ip_defrag_offset + prev->len) - offset;

		if (i > 0) {
			offset += i;
@@ -387,8 +379,8 @@ found:

	err = -ENOMEM;

	while (next && FRAG_CB(next)->offset < end) {
		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
	while (next && next->ip_defrag_offset < end) {
		int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */

		if (i < next->len) {
			int delta = -next->truesize;
@@ -401,7 +393,7 @@ found:
			delta += next->truesize;
			if (delta)
				add_frag_mem_limit(qp->q.net, delta);
			FRAG_CB(next)->offset += i;
			next->ip_defrag_offset += i;
			qp->q.meat -= i;
			if (next->ip_summed != CHECKSUM_UNNECESSARY)
				next->ip_summed = CHECKSUM_NONE;
@@ -425,7 +417,13 @@ found:
		}
	}

	FRAG_CB(skb)->offset = offset;
	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
	dev = skb->dev;
	if (dev)
		qp->iif = dev->ifindex;
	/* Makes sure compiler wont do silly aliasing games */
	barrier();
	skb->ip_defrag_offset = offset;

	/* Insert this fragment in the chain of fragments. */
	skb->next = next;
@@ -436,11 +434,6 @@ found:
	else
		qp->q.fragments = skb;

	dev = skb->dev;
	if (dev) {
		qp->iif = dev->ifindex;
		skb->dev = NULL;
	}
	qp->q.stamp = skb->tstamp;
	qp->q.meat += skb->len;
	qp->ecn |= ecn;
@@ -516,7 +509,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
	}

	WARN_ON(!head);
	WARN_ON(FRAG_CB(head)->offset != 0);
	WARN_ON(head->ip_defrag_offset != 0);

	/* Allocate a new buffer for the datagram. */
	ihlen = ip_hdrlen(head);