Unverified Commit 19a9d13a authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!2670 page_pool: Add support query allocation stats and recycle stats

Merge Pull Request from: @svishen 
 
This pr backport page_pool feature, which support query allocation stats and recycle stats

page_pool_get_stats(): Retrieve statistics about the page_pool. This API is only available if the kernel has been configured with  ``CONFIG_PAGE_POOL_STATS=y``. A pointer to a caller allocated ``struct  page_pool_stats`` structure is passed to this API which is filled in. The  caller can then report those stats to the user (perhaps via ethtool,  debugfs, etc.).

issue:
https://gitee.com/openeuler/kernel/issues/I8BNYE 
 
Link:https://gitee.com/openeuler/kernel/pulls/2670

 

Reviewed-by: default avatarYue Haibing <yuehaibing@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents b03ca0f8 ced5cc6a
Loading
Loading
Loading
Loading
+56 −0
Original line number Diff line number Diff line
@@ -97,6 +97,47 @@ a page will cause no race conditions is enough.

* page_pool_get_dma_dir(): Retrieve the stored DMA direction.

* page_pool_get_stats(): Retrieve statistics about the page_pool. This API
  is only available if the kernel has been configured with
  ``CONFIG_PAGE_POOL_STATS=y``. A pointer to a caller allocated ``struct
  page_pool_stats`` structure is passed to this API which is filled in. The
  caller can then report those stats to the user (perhaps via ethtool,
  debugfs, etc.). See below for an example usage of this API.

Stats API and structures
------------------------
If the kernel is configured with ``CONFIG_PAGE_POOL_STATS=y``, the API
``page_pool_get_stats()`` and structures described below are available. It
takes a  pointer to a ``struct page_pool`` and a pointer to a ``struct
page_pool_stats`` allocated by the caller.

The API will fill in the provided ``struct page_pool_stats`` with
statistics about the page_pool.

The stats structure has the following fields::

    struct page_pool_stats {
        struct page_pool_alloc_stats alloc_stats;
        struct page_pool_recycle_stats recycle_stats;
    };


The ``struct page_pool_alloc_stats`` has the following fields:
  * ``fast``: successful fast path allocations
  * ``slow``: slow path order-0 allocations
  * ``slow_high_order``: slow path high order allocations
  * ``empty``: ptr ring is empty, so a slow path allocation was forced.
  * ``refill``: an allocation which triggered a refill of the cache
  * ``waive``: pages obtained from the ptr ring that cannot be added to
    the cache due to a NUMA mismatch.

The ``struct page_pool_recycle_stats`` has the following fields:
  * ``cached``: recycling placed page in the page pool cache
  * ``cache_full``: page pool cache was full
  * ``ring``: page placed into the ptr ring
  * ``ring_full``: page released from page pool because the ptr ring was full
  * ``released_refcnt``: page released (and not recycled) because refcnt > 1

Coding examples
===============

@@ -149,6 +190,21 @@ NAPI poller
        }
    }

Stats
-----

.. code-block:: c

	#ifdef CONFIG_PAGE_POOL_STATS
	/* retrieve stats */
	struct page_pool_stats stats = { 0 };
	if (page_pool_get_stats(page_pool, &stats)) {
		/* perhaps the driver reports statistics with ethool */
		ethtool_print_allocation_stats(&stats.alloc_stats);
		ethtool_print_recycle_stats(&stats.recycle_stats);
	}
	#endif

Driver unload
-------------

+77 −0
Original line number Diff line number Diff line
@@ -83,6 +83,79 @@ struct page_pool_params {
	unsigned int	offset;  /* DMA addr offset */
};

#ifdef CONFIG_PAGE_POOL_STATS
struct page_pool_alloc_stats {
	u64 fast; /* fast path allocations */
	u64 slow; /* slow-path order 0 allocations */
	u64 slow_high_order; /* slow-path high order allocations */
	u64 empty; /* failed refills due to empty ptr ring, forcing
		    * slow path allocation
		    */
	u64 refill; /* allocations via successful refill */
	u64 waive;  /* failed refills due to numa zone mismatch */
};

struct page_pool_recycle_stats {
	u64 cached;	/* recycling placed page in the cache. */
	u64 cache_full; /* cache was full */
	u64 ring;	/* recycling placed page back into ptr ring */
	u64 ring_full;	/* page was released from page-pool because
			 * PTR ring was full.
			 */
	u64 released_refcnt; /* page released because of elevated
			      * refcnt
			      */
};

/* This struct wraps the above stats structs so users of the
 * page_pool_get_stats API can pass a single argument when requesting the
 * stats for the page pool.
 */
struct page_pool_stats {
	struct page_pool_alloc_stats alloc_stats;
	struct page_pool_recycle_stats recycle_stats;
};

/* To solve the KABI issue, introduce the new statistics structure
 * to store the member alloc_stats and recycle_stats.
 */
struct page_pool_raw_stats {
	/* these stats are incremented while in softirq context */
	struct page_pool_alloc_stats alloc_stats;
	/* recycle stats are per-cpu to avoid locking */
	struct page_pool_recycle_stats __percpu *recycle_stats;
};

int page_pool_ethtool_stats_get_count(void);
u8 *page_pool_ethtool_stats_get_strings(u8 *data);
u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);

/*
 * Drivers that wish to harvest page pool stats and report them to users
 * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
 * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
 */
bool page_pool_get_stats(struct page_pool *pool,
			 struct page_pool_stats *stats);
#else

static inline int page_pool_ethtool_stats_get_count(void)
{
	return 0;
}

static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
{
	return data;
}

static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
{
	return data;
}

#endif

struct page_pool {
	struct page_pool_params p;

@@ -134,7 +207,11 @@ struct page_pool {

	u64 destroy_cnt;

#ifdef CONFIG_PAGE_POOL_STATS
	KABI_USE(1, struct page_pool_raw_stats *stats)
#else
	KABI_RESERVE(1)
#endif
};

struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+13 −0
Original line number Diff line number Diff line
@@ -436,6 +436,19 @@ config NET_DEVLINK
config PAGE_POOL
	bool

config PAGE_POOL_STATS
	default n
	bool "Page pool stats"
	depends on PAGE_POOL
	help
	  Enable page pool statistics to track page allocation and recycling
	  in page pools. This option incurs additional CPU cost in allocation
	  and recycle paths and additional memory cost to store the statistics.
	  These statistics are only available if this option is enabled and if
	  the driver using the page pool supports exporting this data.

	  If unsure, say N.

config FAILOVER
	tristate "Generic failover module"
	help
+166 −10
Original line number Diff line number Diff line
@@ -26,6 +26,116 @@

#define BIAS_MAX	LONG_MAX

#ifdef CONFIG_PAGE_POOL_STATS
/* alloc_stat_inc is intended to be used in softirq context */
#define alloc_stat_inc(pool, __stat)	(pool->stats->alloc_stats.__stat++)
/* recycle_stat_inc is safe to use when preemption is possible. */
#define recycle_stat_inc(pool, __stat)							\
	do {										\
		struct page_pool_recycle_stats __percpu *s = pool->stats->recycle_stats;\
		this_cpu_inc(s->__stat);						\
	} while (0)

#define recycle_stat_add(pool, __stat, val)						\
	do {										\
		struct page_pool_recycle_stats __percpu *s = pool->stats->recycle_stats;\
		this_cpu_add(s->__stat, val);						\
	} while (0)

/* workaround for macro ETH_GSTRING_LEN, for include the header file ethtool.h
 * will cause KABI issue, so define a new one to replace it.
 */
#define PP_ETH_GSTRING_LEN 32
static const char pp_stats[][PP_ETH_GSTRING_LEN] = {
	"rx_pp_alloc_fast",
	"rx_pp_alloc_slow",
	"rx_pp_alloc_slow_ho",
	"rx_pp_alloc_empty",
	"rx_pp_alloc_refill",
	"rx_pp_alloc_waive",
	"rx_pp_recycle_cached",
	"rx_pp_recycle_cache_full",
	"rx_pp_recycle_ring",
	"rx_pp_recycle_ring_full",
	"rx_pp_recycle_released_ref",
};

bool page_pool_get_stats(struct page_pool *pool,
			 struct page_pool_stats *stats)
{
	int cpu = 0;

	if (!stats)
		return false;

	/* The caller is responsible to initialize stats. */
	stats->alloc_stats.fast += pool->stats->alloc_stats.fast;
	stats->alloc_stats.slow += pool->stats->alloc_stats.slow;
	stats->alloc_stats.slow_high_order += pool->stats->alloc_stats.slow_high_order;
	stats->alloc_stats.empty += pool->stats->alloc_stats.empty;
	stats->alloc_stats.refill += pool->stats->alloc_stats.refill;
	stats->alloc_stats.waive += pool->stats->alloc_stats.waive;

	for_each_possible_cpu(cpu) {
		const struct page_pool_recycle_stats *pcpu =
			per_cpu_ptr(pool->stats->recycle_stats, cpu);

		stats->recycle_stats.cached += pcpu->cached;
		stats->recycle_stats.cache_full += pcpu->cache_full;
		stats->recycle_stats.ring += pcpu->ring;
		stats->recycle_stats.ring_full += pcpu->ring_full;
		stats->recycle_stats.released_refcnt += pcpu->released_refcnt;
	}

	return true;
}
EXPORT_SYMBOL(page_pool_get_stats);

u8 *page_pool_ethtool_stats_get_strings(u8 *data)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(pp_stats); i++) {
		memcpy(data, pp_stats[i], PP_ETH_GSTRING_LEN);
		data += PP_ETH_GSTRING_LEN;
	}

	return data;
}
EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings);

int page_pool_ethtool_stats_get_count(void)
{
	return ARRAY_SIZE(pp_stats);
}
EXPORT_SYMBOL(page_pool_ethtool_stats_get_count);

u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
{
	struct page_pool_stats *pool_stats = stats;

	*data++ = pool_stats->alloc_stats.fast;
	*data++ = pool_stats->alloc_stats.slow;
	*data++ = pool_stats->alloc_stats.slow_high_order;
	*data++ = pool_stats->alloc_stats.empty;
	*data++ = pool_stats->alloc_stats.refill;
	*data++ = pool_stats->alloc_stats.waive;
	*data++ = pool_stats->recycle_stats.cached;
	*data++ = pool_stats->recycle_stats.cache_full;
	*data++ = pool_stats->recycle_stats.ring;
	*data++ = pool_stats->recycle_stats.ring_full;
	*data++ = pool_stats->recycle_stats.released_refcnt;

	return data;
}
EXPORT_SYMBOL(page_pool_ethtool_stats_get);

#else
#define alloc_stat_inc(pool, __stat)
#define recycle_stat_inc(pool, __stat)
#define recycle_stat_add(pool, __stat, val)
#endif

static int page_pool_init(struct page_pool *pool,
			  const struct page_pool_params *params)
{
@@ -75,8 +185,17 @@ static int page_pool_init(struct page_pool *pool,
		 */
	}

	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
#ifdef CONFIG_PAGE_POOL_STATS
	pool->stats = kzalloc_node(sizeof(*pool->stats), GFP_KERNEL, params->nid);
	if (!pool->stats)
		return -ENOMEM;
	pool->stats->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
	if (!pool->stats->recycle_stats)
		goto out;
#endif

	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
		goto out;

	atomic_set(&pool->pages_state_release_cnt, 0);

@@ -87,6 +206,13 @@ static int page_pool_init(struct page_pool *pool,
		get_device(pool->p.dev);

	return 0;
out:
#ifdef CONFIG_PAGE_POOL_STATS
	free_percpu(pool->stats->recycle_stats);
	kfree(pool->stats);
	pool->stats = NULL;
#endif
	return -ENOMEM;
}

struct page_pool *page_pool_create(const struct page_pool_params *params)
@@ -119,8 +245,10 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
	int pref_nid; /* preferred NUMA node */

	/* Quicker fallback, avoid locks when ring is empty */
	if (__ptr_ring_empty(r))
	if (__ptr_ring_empty(r)) {
		alloc_stat_inc(pool, empty);
		return NULL;
	}

	/* Softirq guarantee CPU and thus NUMA node is stable. This,
	 * assumes CPU refilling driver RX-ring will also run RX-NAPI.
@@ -150,14 +278,17 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
			 * This limit stress on page buddy alloactor.
			 */
			page_pool_return_page(pool, page);
			alloc_stat_inc(pool, waive);
			page = NULL;
			break;
		}
	} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);

	/* Return last page */
	if (likely(pool->alloc.count > 0))
	if (likely(pool->alloc.count > 0)) {
		page = pool->alloc.cache[--pool->alloc.count];
		alloc_stat_inc(pool, refill);
	}

	spin_unlock(&r->consumer_lock);
	return page;
@@ -172,6 +303,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
	if (likely(pool->alloc.count)) {
		/* Fast-path */
		page = pool->alloc.cache[--pool->alloc.count];
		alloc_stat_inc(pool, fast);
	} else {
		page = page_pool_refill_alloc_cache(pool);
	}
@@ -243,6 +375,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
		return NULL;
	}

	alloc_stat_inc(pool, slow_high_order);
	page_pool_set_pp_info(pool, page);

	/* Track how many pages are held 'in-flight' */
@@ -297,10 +430,12 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
	}

	/* Return last page */
	if (likely(pool->alloc.count > 0))
	if (likely(pool->alloc.count > 0)) {
		page = pool->alloc.cache[--pool->alloc.count];
	else
		alloc_stat_inc(pool, slow);
	} else {
		page = NULL;
	}

	/* When page just alloc'ed is should/must have refcnt 1. */
	return page;
@@ -398,7 +533,12 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
	else
		ret = ptr_ring_produce_bh(&pool->ring, page);

	return (ret == 0) ? true : false;
	if (!ret) {
		recycle_stat_inc(pool, ring);
		return true;
	}

	return false;
}

/* Only allow direct recycling in special circumstances, into the
@@ -409,11 +549,14 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
static bool page_pool_recycle_in_cache(struct page *page,
				       struct page_pool *pool)
{
	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
		recycle_stat_inc(pool, cache_full);
		return false;
	}

	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
	pool->alloc.cache[pool->alloc.count++] = page;
	recycle_stat_inc(pool, cached);
	return true;
}

@@ -468,6 +611,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
	 * doing refcnt based recycle tricks, meaning another process
	 * will be invoking put_page.
	 */
	recycle_stat_inc(pool, released_refcnt);
	/* Do not replace this with page_pool_return_page() */
	page_pool_release_page(pool, page);
	put_page(page);
@@ -481,6 +625,7 @@ void page_pool_put_page(struct page_pool *pool, struct page *page,
	page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
	if (page && !page_pool_recycle_in_ring(pool, page)) {
		/* Cache full, fallback to free pages */
		recycle_stat_inc(pool, ring_full);
		page_pool_return_page(pool, page);
	}
}
@@ -507,9 +652,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
	/* Bulk producer into ptr_ring page_pool cache */
	page_pool_ring_lock(pool);
	for (i = 0; i < bulk_len; i++) {
		if (__ptr_ring_produce(&pool->ring, data[i]))
			break; /* ring full */
		if (__ptr_ring_produce(&pool->ring, data[i])) {
			/* ring full */
			recycle_stat_inc(pool, ring_full);
			break;
		}
	}
	recycle_stat_add(pool, ring, i);
	page_pool_ring_unlock(pool);

	/* Hopefully all pages was return into ptr_ring */
@@ -575,9 +724,11 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,

	if (page && *offset + size > max_size) {
		page = page_pool_drain_frag(pool, page);
		if (page)
		if (page) {
			alloc_stat_inc(pool, fast);
			goto frag_reset;
		}
	}

	if (!page) {
		page = page_pool_alloc_pages(pool, gfp);
@@ -598,6 +749,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,

	pool->frag_users++;
	pool->frag_offset = *offset + size;
	alloc_stat_inc(pool, fast);
	return page;
}
EXPORT_SYMBOL(page_pool_alloc_frag);
@@ -627,6 +779,10 @@ static void page_pool_free(struct page_pool *pool)
	if (pool->p.flags & PP_FLAG_DMA_MAP)
		put_device(pool->p.dev);

#ifdef CONFIG_PAGE_POOL_STATS
	free_percpu(pool->stats->recycle_stats);
	kfree(pool->stats);
#endif
	kfree(pool);
}