Commit 5584d9e6 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'xdp: recycle Page Pool backed skbs built from XDP frames'

Alexander Lobakin says:

====================

Yeah, I still remember that "Who needs cpumap nowadays" (c), but anyway.

__xdp_build_skb_from_frame() missed the moment when the networking stack
became able to recycle skb pages backed by a page_pool. This was making
e.g. cpumap redirect even less effective than simple %XDP_PASS. veth was
also affected in some scenarios.
A lot of drivers use skb_mark_for_recycle() already, it's been almost
two years and seems like there are no issues in using it in the generic
code too. {__,}xdp_release_frame() can be then removed as it losts its
last user.
Page Pool becomes then zero-alloc (or almost) in the abovementioned
cases, too. Other memory type models (who needs them at this point)
have no changes.

Some numbers on 1 Xeon Platinum core bombed with 27 Mpps of 64-byte
IPv6 UDP, iavf w/XDP[0] (CONFIG_PAGE_POOL_STATS is enabled):

Plain %XDP_PASS on baseline, Page Pool driver:

src cpu Rx     drops  dst cpu Rx
  2.1 Mpps       N/A    2.1 Mpps

cpumap redirect (cross-core, w/o leaving its NUMA node) on baseline:

  6.8 Mpps  5.0 Mpps    1.8 Mpps

cpumap redirect with skb PP recycling:

  7.9 Mpps  5.7 Mpps    2.2 Mpps
                       +22% (from cpumap redir on baseline)

[0] https://github.com/alobakin/linux/commits/iavf-xdp


====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 283b40c5 d4e49233
Loading
Loading
Loading
Loading
+2 −2
Original line number Original line Diff line number Diff line
@@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
#endif
#endif
}
}


#ifdef CONFIG_PAGE_POOL
static inline void skb_mark_for_recycle(struct sk_buff *skb)
static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
{
#ifdef CONFIG_PAGE_POOL
	skb->pp_recycle = 1;
	skb->pp_recycle = 1;
}
#endif
#endif
}


#endif	/* __KERNEL__ */
#endif	/* __KERNEL__ */
#endif	/* _LINUX_SKBUFF_H */
#endif	/* _LINUX_SKBUFF_H */
+0 −29
Original line number Original line Diff line number Diff line
@@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
			   struct xdp_frame_bulk *bq);
			   struct xdp_frame_bulk *bq);


/* When sending xdp_frame into the network stack, then there is no
 * return point callback, which is needed to release e.g. DMA-mapping
 * resources with page_pool.  Thus, have explicit function to release
 * frame resources.
 */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
	struct xdp_mem_info *mem = &xdpf->mem;
	struct skb_shared_info *sinfo;
	int i;

	/* Curr only page_pool needs this */
	if (mem->type != MEM_TYPE_PAGE_POOL)
		return;

	if (likely(!xdp_frame_has_frags(xdpf)))
		goto out;

	sinfo = xdp_get_shared_info_from_frame(xdpf);
	for (i = 0; i < sinfo->nr_frags; i++) {
		struct page *page = skb_frag_page(&sinfo->frags[i]);

		__xdp_release_frame(page_address(page), mem);
	}
out:
	__xdp_release_frame(xdpf->data, mem);
}

static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
{
{
	struct skb_shared_info *sinfo;
	struct skb_shared_info *sinfo;
+2 −17
Original line number Original line Diff line number Diff line
@@ -531,21 +531,6 @@ void xdp_return_buff(struct xdp_buff *xdp)
}
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
EXPORT_SYMBOL_GPL(xdp_return_buff);


/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
{
	struct xdp_mem_allocator *xa;
	struct page *page;

	rcu_read_lock();
	xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
	page = virt_to_head_page(data);
	if (xa)
		page_pool_release_page(xa->page_pool, page);
	rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(__xdp_release_frame);

void xdp_attachment_setup(struct xdp_attachment_info *info,
void xdp_attachment_setup(struct xdp_attachment_info *info,
			  struct netdev_bpf *bpf)
			  struct netdev_bpf *bpf)
{
{
@@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
	 * - RX ring dev queue index	(skb_record_rx_queue)
	 * - RX ring dev queue index	(skb_record_rx_queue)
	 */
	 */


	/* Until page_pool get SKB return path, release DMA here */
	if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
	xdp_release_frame(xdpf);
		skb_mark_for_recycle(skb);


	/* Allow SKB to reuse area used by xdp_frame */
	/* Allow SKB to reuse area used by xdp_frame */
	xdp_scrub_frame(xdpf);
	xdp_scrub_frame(xdpf);
+26 −10
Original line number Original line Diff line number Diff line
@@ -4,6 +4,19 @@


#define ETH_ALEN 6
#define ETH_ALEN 6
#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))

/**
 * enum frame_mark - magics to distinguish page/packet paths
 * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
 * @MARK_IN: frame is being processed by the input XDP prog.
 * @MARK_SKB: frame did hit the TC ingress hook as an skb.
 */
enum frame_mark {
	MARK_XMIT	= 0U,
	MARK_IN		= 0x42,
	MARK_SKB	= 0x45,
};

const volatile int ifindex_out;
const volatile int ifindex_out;
const volatile int ifindex_in;
const volatile int ifindex_in;
const volatile __u8 expect_dst[ETH_ALEN];
const volatile __u8 expect_dst[ETH_ALEN];
@@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp)
	if (*metadata != 0x42)
	if (*metadata != 0x42)
		return XDP_ABORTED;
		return XDP_ABORTED;


	if (*payload == 0) {
	if (*payload == MARK_XMIT)
		*payload = 0x42;
		pkts_seen_zero++;
		pkts_seen_zero++;
	}

	*payload = MARK_IN;


	if (bpf_xdp_adjust_meta(xdp, 4))
	if (bpf_xdp_adjust_meta(xdp, 4))
		return XDP_ABORTED;
		return XDP_ABORTED;
@@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp)
	return ret;
	return ret;
}
}


static bool check_pkt(void *data, void *data_end)
static bool check_pkt(void *data, void *data_end, const __u32 mark)
{
{
	struct ipv6hdr *iph = data + sizeof(struct ethhdr);
	struct ipv6hdr *iph = data + sizeof(struct ethhdr);
	__u8 *payload = data + HDR_SZ;
	__u8 *payload = data + HDR_SZ;
@@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end)
	if (payload + 1 > data_end)
	if (payload + 1 > data_end)
		return false;
		return false;


	if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
	if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
		return false;
		return false;


	/* reset the payload so the same packet doesn't get counted twice when
	/* reset the payload so the same packet doesn't get counted twice when
	 * it cycles back through the kernel path and out the dst veth
	 * it cycles back through the kernel path and out the dst veth
	 */
	 */
	*payload = 0;
	*payload = mark;
	return true;
	return true;
}
}


@@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp)
	void *data = (void *)(long)xdp->data;
	void *data = (void *)(long)xdp->data;
	void *data_end = (void *)(long)xdp->data_end;
	void *data_end = (void *)(long)xdp->data_end;


	if (check_pkt(data, data_end))
	if (check_pkt(data, data_end, MARK_XMIT))
		pkts_seen_xdp++;
		pkts_seen_xdp++;


	/* Return XDP_DROP to make sure the data page is recycled, like when it
	/* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
	 * exits a physical NIC. Recycled pages will be counted in the
	 * it exited a physical NIC. Those pages will be counted in the
	 * pkts_seen_zero counter above.
	 * pkts_seen_zero counter above.
	 */
	 */
	return XDP_DROP;
	return XDP_DROP;
@@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb)
	void *data = (void *)(long)skb->data;
	void *data = (void *)(long)skb->data;
	void *data_end = (void *)(long)skb->data_end;
	void *data_end = (void *)(long)skb->data_end;


	if (check_pkt(data, data_end))
	if (check_pkt(data, data_end, MARK_SKB))
		pkts_seen_tc++;
		pkts_seen_tc++;


	/* Will be either recycled or freed, %MARK_SKB makes sure it won't
	 * hit any of the counters above.
	 */
	return 0;
	return 0;
}
}