Commit e61caf04 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'page_pool-allow-caching-from-safely-localized-napi'

Jakub Kicinski says:

====================
page_pool: allow caching from safely localized NAPI

I went back to the explicit "are we in NAPI method", mostly
because I don't like having both around :( (even tho I maintain
that in_softirq() && !in_hardirq() is as safe, as softirqs do
not nest).

Still returning the skbs to a CPU, tho, not to the NAPI instance.
I reckon we could create a small refcounted struct per NAPI instance
which would allow sockets and other users so hold a persisent
and safe reference. But that's a bigger change, and I get 90+%
recycling thru the cache with just these patches (for RR and
streaming tests with 100% CPU use it's almost 100%).

Some numbers for streaming test with 100% CPU use (from previous version,
but really they perform the same):

		HW-GRO				page=page
		before		after		before		after
recycle:
cached:			0	138669686		0	150197505
cache_full:		0	   223391		0	    74582
ring:		138551933         9997191	149299454		0
ring_full: 		0             488	     3154	   127590
released_refcnt:	0		0		0		0

alloc:
fast:		136491361	148615710	146969587	150322859
slow:		     1772	     1799	      144	      105
slow_high_order:	0		0		0		0
empty:		     1772	     1799	      144	      105
refill:		  2165245	   156302	  2332880	     2128
waive:			0		0		0		0

v1: https://lore.kernel.org/all/20230411201800.596103-1-kuba@kernel.org/
rfcv2: https://lore.kernel.org/all/20230405232100.103392-1-kuba@kernel.org/
====================

Link: https://lore.kernel.org/r/20230413042605.895677-1-kuba@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c11d2e71 294e39e0
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -165,6 +165,7 @@ Registration
    pp_params.pool_size = DESC_NUM;
    pp_params.nid = NUMA_NO_NODE;
    pp_params.dev = priv->dev;
    pp_params.napi = napi; /* only if locking is tied to NAPI */
    pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
    page_pool = page_pool_create(&pp_params);

+1 −0
Original line number Diff line number Diff line
@@ -3211,6 +3211,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,

	pp.pool_size = bp->rx_ring_size;
	pp.nid = dev_to_node(&bp->pdev->dev);
	pp.napi = &rxr->bnapi->napi;
	pp.dev = &bp->pdev->dev;
	pp.dma_dir = DMA_BIDIRECTIONAL;

+3 −0
Original line number Diff line number Diff line
@@ -360,8 +360,11 @@ struct napi_struct {
	unsigned long		gro_bitmask;
	int			(*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
	/* CPU actively polling if netpoll is configured */
	int			poll_owner;
#endif
	/* CPU on which NAPI has been scheduled for processing */
	int			list_owner;
	struct net_device	*dev;
	struct gro_list		gro_hash[GRO_HASH_BUCKETS];
	struct sk_buff		*skb;
+13 −7
Original line number Diff line number Diff line
@@ -3386,6 +3386,18 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
	__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}

static inline void
napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
{
	struct page *page = skb_frag_page(frag);

#ifdef CONFIG_PAGE_POOL
	if (recycle && page_pool_return_skb_page(page, napi_safe))
		return;
#endif
	put_page(page);
}

/**
 * __skb_frag_unref - release a reference on a paged fragment.
 * @frag: the paged fragment
@@ -3396,13 +3408,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
 */
static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
{
	struct page *page = skb_frag_page(frag);

#ifdef CONFIG_PAGE_POOL
	if (recycle && page_pool_return_skb_page(page))
		return;
#endif
	put_page(page);
	napi_frag_unref(frag, recycle, false);
}

/**
+2 −1
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@ struct page_pool_params {
	unsigned int	pool_size;
	int		nid;  /* Numa node id to allocate from pages from */
	struct device	*dev; /* device, for DMA pre-mapping purposes */
	struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */
	enum dma_data_direction dma_dir; /* DMA mapping direction */
	unsigned int	max_len; /* max DMA sync memory size */
	unsigned int	offset;  /* DMA addr offset */
@@ -239,7 +240,7 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
	return pool->p.dma_dir;
}

bool page_pool_return_skb_page(struct page *page);
bool page_pool_return_skb_page(struct page *page, bool napi_safe);

struct page_pool *page_pool_create(const struct page_pool_params *params);

Loading