Commit 7170debe authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mvneta-xdp'



Lorenzo Bianconi says:

====================
add XDP support to mvneta driver

Add XDP support to mvneta driver for devices that rely on software
buffer management. Supported verdicts are:
- XDP_DROP
- XDP_PASS
- XDP_REDIRECT
- XDP_TX
Moreover set ndo_xdp_xmit net_device_ops function pointer in order
to support redirecting from other device (e.g. virtio-net).
Convert mvneta driver to page_pool API.
This series is based on previous work done by Jesper and Ilias.
We will send follow-up patches to reduce DMA-sync operations.

Changes since v4:
- reset page_pool pointer to NULL in mvneta_rxq_drop_pkts and in
  mvneta_create_page_pool error path
- move dma sync in mvneta_rx_refill() in patch 2/7
- verify bpf prog pointer in mvneta_xdp_setup to double-check if
  stop/start is really necessary
- coding style fixes

Changes since v3:
- rename MVNETA_XDP_CONSUMED in MVNETA_XDP_DROPPED
- squash patch 4/8 and patch 3/8
- fix dma sync for XDP_TX verdict
- fix queue_index in xdp_rxq_info_reg
- cosmetics

Changes since v2:
- rely on page_pool_recycle_direct instead of xdp_return_buff for XDP_DROP
- define xdp buffer in mvneta_rx_swbm and avoid default initializations
- use dma_sync_single_for_cpu instead of dma_sync_single_range_for_cpu
- run page_pool_release_page in mvneta_swbm_add_rx_fragment even if
  the buffer contains just ETH_FCS

Changes since v1:
- sync dma buffers before refilling hw queues
- fix stats accounting

Changes since RFC:
- implement XDP_TX
- make tx pending buffer list agnostic
- code refactoring
- check if device is running in mvneta_xdp_setup
====================

Reviewed-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 13faf771 b0a43db9
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -61,6 +61,7 @@ config MVNETA
	depends on ARCH_MVEBU || COMPILE_TEST
	select MVMDIO
	select PHYLINK
	select PAGE_POOL
	---help---
	  This driver supports the network interface units in the
	  Marvell ARMADA XP, ARMADA 370, ARMADA 38x and
+469 −149
Original line number Diff line number Diff line
@@ -37,6 +37,8 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/tso.h>
#include <net/page_pool.h>
#include <linux/bpf_trace.h>

/* Registers */
#define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
@@ -322,6 +324,13 @@
	      ETH_HLEN + ETH_FCS_LEN,			     \
	      cache_line_size())

#define MVNETA_SKB_HEADROOM	(max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
				 NET_IP_ALIGN)
#define MVNETA_SKB_PAD	(SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
			 MVNETA_SKB_HEADROOM))
#define MVNETA_SKB_SIZE(len)	(SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD)
#define MVNETA_MAX_RX_BUF_SIZE	(PAGE_SIZE - MVNETA_SKB_PAD)

#define IS_TSO_HEADER(txq, addr) \
	((addr >= txq->tso_hdrs_phys) && \
	 (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
@@ -346,6 +355,11 @@ struct mvneta_statistic {
#define T_REG_64	64
#define T_SW		1

#define MVNETA_XDP_PASS		BIT(0)
#define MVNETA_XDP_DROPPED	BIT(1)
#define MVNETA_XDP_TX		BIT(2)
#define MVNETA_XDP_REDIR	BIT(3)

static const struct mvneta_statistic mvneta_statistics[] = {
	{ 0x3000, T_REG_64, "good_octets_received", },
	{ 0x3010, T_REG_32, "good_frames_received", },
@@ -425,6 +439,8 @@ struct mvneta_port {
	u32 cause_rx_tx;
	struct napi_struct napi;

	struct bpf_prog *xdp_prog;

	/* Core clock */
	struct clk *clk;
	/* AXI clock */
@@ -545,6 +561,20 @@ struct mvneta_rx_desc {
};
#endif

enum mvneta_tx_buf_type {
	MVNETA_TYPE_SKB,
	MVNETA_TYPE_XDP_TX,
	MVNETA_TYPE_XDP_NDO,
};

struct mvneta_tx_buf {
	enum mvneta_tx_buf_type type;
	union {
		struct xdp_frame *xdpf;
		struct sk_buff *skb;
	};
};

struct mvneta_tx_queue {
	/* Number of this TX queue, in the range 0-7 */
	u8 id;
@@ -560,8 +590,8 @@ struct mvneta_tx_queue {
	int tx_stop_threshold;
	int tx_wake_threshold;

	/* Array of transmitted skb */
	struct sk_buff **tx_skb;
	/* Array of transmitted buffers */
	struct mvneta_tx_buf *buf;

	/* Index of last TX DMA descriptor that was inserted */
	int txq_put_index;
@@ -603,6 +633,10 @@ struct mvneta_rx_queue {
	u32 pkts_coal;
	u32 time_coal;

	/* page_pool */
	struct page_pool *page_pool;
	struct xdp_rxq_info xdp_rxq;

	/* Virtual address of the RX buffer */
	void  **buf_virt_addr;

@@ -641,7 +675,6 @@ static int txq_number = 8;
static int rxq_def;

static int rx_copybreak __read_mostly = 256;
static int rx_header_size __read_mostly = 128;

/* HW BM need that each port be identify by a unique ID */
static int global_port_id;
@@ -1761,24 +1794,25 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
	int i;

	for (i = 0; i < num; i++) {
		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index];
		struct mvneta_tx_desc *tx_desc = txq->descs +
			txq->txq_get_index;
		struct sk_buff *skb = txq->tx_skb[txq->txq_get_index];

		if (skb) {
			bytes_compl += skb->len;
			pkts_compl++;
		}

		mvneta_txq_inc_get(txq);

		if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
		if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr) &&
		    buf->type != MVNETA_TYPE_XDP_TX)
			dma_unmap_single(pp->dev->dev.parent,
					 tx_desc->buf_phys_addr,
					 tx_desc->data_size, DMA_TO_DEVICE);
		if (!skb)
			continue;
		dev_kfree_skb_any(skb);
		if (buf->type == MVNETA_TYPE_SKB && buf->skb) {
			bytes_compl += buf->skb->len;
			pkts_compl++;
			dev_kfree_skb_any(buf->skb);
		} else if (buf->type == MVNETA_TYPE_XDP_TX ||
			   buf->type == MVNETA_TYPE_XDP_NDO) {
			xdp_return_frame(buf->xdpf);
		}
	}

	netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
@@ -1812,23 +1846,21 @@ static int mvneta_rx_refill(struct mvneta_port *pp,
			    struct mvneta_rx_queue *rxq,
			    gfp_t gfp_mask)
{
	enum dma_data_direction dma_dir;
	dma_addr_t phys_addr;
	struct page *page;

	page = __dev_alloc_page(gfp_mask);
	page = page_pool_alloc_pages(rxq->page_pool,
				     gfp_mask | __GFP_NOWARN);
	if (!page)
		return -ENOMEM;

	/* map page for use */
	phys_addr = dma_map_page(pp->dev->dev.parent, page, 0, PAGE_SIZE,
				 DMA_FROM_DEVICE);
	if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
		__free_page(page);
		return -ENOMEM;
	}

	phys_addr += pp->rx_offset_correction;
	phys_addr = page_pool_get_dma_addr(page) + pp->rx_offset_correction;
	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
	dma_sync_single_for_device(pp->dev->dev.parent, phys_addr,
				   MVNETA_MAX_RX_BUF_SIZE, dma_dir);
	mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq);

	return 0;
}

@@ -1894,10 +1926,29 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
		if (!data || !(rx_desc->buf_phys_addr))
			continue;

		dma_unmap_page(pp->dev->dev.parent, rx_desc->buf_phys_addr,
			       PAGE_SIZE, DMA_FROM_DEVICE);
		__free_page(data);
		page_pool_put_page(rxq->page_pool, data, false);
	}
	if (xdp_rxq_info_is_reg(&rxq->xdp_rxq))
		xdp_rxq_info_unreg(&rxq->xdp_rxq);
	page_pool_destroy(rxq->page_pool);
	rxq->page_pool = NULL;
}

static void
mvneta_update_stats(struct mvneta_port *pp, u32 pkts,
		    u32 len, bool tx)
{
	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);

	u64_stats_update_begin(&stats->syncp);
	if (tx) {
		stats->tx_packets += pkts;
		stats->tx_bytes += len;
	} else {
		stats->rx_packets += pkts;
		stats->rx_bytes += len;
	}
	u64_stats_update_end(&stats->syncp);
}

static inline
@@ -1925,43 +1976,292 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
	return i;
}

static int
mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
			struct xdp_frame *xdpf, bool dma_map)
{
	struct mvneta_tx_desc *tx_desc;
	struct mvneta_tx_buf *buf;
	dma_addr_t dma_addr;

	if (txq->count >= txq->tx_stop_threshold)
		return MVNETA_XDP_DROPPED;

	tx_desc = mvneta_txq_next_desc_get(txq);

	buf = &txq->buf[txq->txq_put_index];
	if (dma_map) {
		/* ndo_xdp_xmit */
		dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
					  xdpf->len, DMA_TO_DEVICE);
		if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
			mvneta_txq_desc_put(txq);
			return MVNETA_XDP_DROPPED;
		}
		buf->type = MVNETA_TYPE_XDP_NDO;
	} else {
		struct page *page = virt_to_page(xdpf->data);

		dma_addr = page_pool_get_dma_addr(page) +
			   sizeof(*xdpf) + xdpf->headroom;
		dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
					   xdpf->len, DMA_BIDIRECTIONAL);
		buf->type = MVNETA_TYPE_XDP_TX;
	}
	buf->xdpf = xdpf;

	tx_desc->command = MVNETA_TXD_FLZ_DESC;
	tx_desc->buf_phys_addr = dma_addr;
	tx_desc->data_size = xdpf->len;

	mvneta_update_stats(pp, 1, xdpf->len, true);
	mvneta_txq_inc_put(txq);
	txq->pending++;
	txq->count++;

	return MVNETA_XDP_TX;
}

static int
mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
{
	struct mvneta_tx_queue *txq;
	struct netdev_queue *nq;
	struct xdp_frame *xdpf;
	int cpu;
	u32 ret;

	xdpf = convert_to_xdp_frame(xdp);
	if (unlikely(!xdpf))
		return MVNETA_XDP_DROPPED;

	cpu = smp_processor_id();
	txq = &pp->txqs[cpu % txq_number];
	nq = netdev_get_tx_queue(pp->dev, txq->id);

	__netif_tx_lock(nq, cpu);
	ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
	if (ret == MVNETA_XDP_TX)
		mvneta_txq_pend_desc_add(pp, txq, 0);
	__netif_tx_unlock(nq);

	return ret;
}

static int
mvneta_xdp_xmit(struct net_device *dev, int num_frame,
		struct xdp_frame **frames, u32 flags)
{
	struct mvneta_port *pp = netdev_priv(dev);
	int cpu = smp_processor_id();
	struct mvneta_tx_queue *txq;
	struct netdev_queue *nq;
	int i, drops = 0;
	u32 ret;

	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
		return -EINVAL;

	txq = &pp->txqs[cpu % txq_number];
	nq = netdev_get_tx_queue(pp->dev, txq->id);

	__netif_tx_lock(nq, cpu);
	for (i = 0; i < num_frame; i++) {
		ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
		if (ret != MVNETA_XDP_TX) {
			xdp_return_frame_rx_napi(frames[i]);
			drops++;
		}
	}

	if (unlikely(flags & XDP_XMIT_FLUSH))
		mvneta_txq_pend_desc_add(pp, txq, 0);
	__netif_tx_unlock(nq);

	return num_frame - drops;
}

static int
mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
	       struct bpf_prog *prog, struct xdp_buff *xdp)
{
	u32 ret, act = bpf_prog_run_xdp(prog, xdp);

	switch (act) {
	case XDP_PASS:
		ret = MVNETA_XDP_PASS;
		break;
	case XDP_REDIRECT: {
		int err;

		err = xdp_do_redirect(pp->dev, xdp, prog);
		if (err) {
			ret = MVNETA_XDP_DROPPED;
			xdp_return_buff(xdp);
		} else {
			ret = MVNETA_XDP_REDIR;
		}
		break;
	}
	case XDP_TX:
		ret = mvneta_xdp_xmit_back(pp, xdp);
		if (ret != MVNETA_XDP_TX)
			xdp_return_buff(xdp);
		break;
	default:
		bpf_warn_invalid_xdp_action(act);
		/* fall through */
	case XDP_ABORTED:
		trace_xdp_exception(pp->dev, prog, act);
		/* fall through */
	case XDP_DROP:
		page_pool_recycle_direct(rxq->page_pool,
					 virt_to_head_page(xdp->data));
		ret = MVNETA_XDP_DROPPED;
		break;
	}

	return ret;
}

static int
mvneta_swbm_rx_frame(struct mvneta_port *pp,
		     struct mvneta_rx_desc *rx_desc,
		     struct mvneta_rx_queue *rxq,
		     struct xdp_buff *xdp,
		     struct bpf_prog *xdp_prog,
		     struct page *page, u32 *xdp_ret)
{
	unsigned char *data = page_address(page);
	int data_len = -MVNETA_MH_SIZE, len;
	struct net_device *dev = pp->dev;
	enum dma_data_direction dma_dir;

	if (MVNETA_SKB_SIZE(rx_desc->data_size) > PAGE_SIZE) {
		len = MVNETA_MAX_RX_BUF_SIZE;
		data_len += len;
	} else {
		len = rx_desc->data_size;
		data_len += len - ETH_FCS_LEN;
	}

	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
	dma_sync_single_for_cpu(dev->dev.parent,
				rx_desc->buf_phys_addr,
				len, dma_dir);

	/* Prefetch header */
	prefetch(data);

	xdp->data_hard_start = data;
	xdp->data = data + MVNETA_SKB_HEADROOM + MVNETA_MH_SIZE;
	xdp->data_end = xdp->data + data_len;
	xdp_set_data_meta_invalid(xdp);

	if (xdp_prog) {
		u32 ret;

		ret = mvneta_run_xdp(pp, rxq, xdp_prog, xdp);
		if (ret != MVNETA_XDP_PASS) {
			mvneta_update_stats(pp, 1,
					    xdp->data_end - xdp->data,
					    false);
			rx_desc->buf_phys_addr = 0;
			*xdp_ret |= ret;
			return ret;
		}
	}

	rxq->skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
	if (unlikely(!rxq->skb)) {
		netdev_err(dev,
			   "Can't allocate skb on queue %d\n",
			   rxq->id);
		dev->stats.rx_dropped++;
		rxq->skb_alloc_err++;
		return -ENOMEM;
	}
	page_pool_release_page(rxq->page_pool, page);

	skb_reserve(rxq->skb,
		    xdp->data - xdp->data_hard_start);
	skb_put(rxq->skb, xdp->data_end - xdp->data);
	mvneta_rx_csum(pp, rx_desc->status, rxq->skb);

	rxq->left_size = rx_desc->data_size - len;
	rx_desc->buf_phys_addr = 0;

	return 0;
}

static void
mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
			    struct mvneta_rx_desc *rx_desc,
			    struct mvneta_rx_queue *rxq,
			    struct page *page)
{
	struct net_device *dev = pp->dev;
	enum dma_data_direction dma_dir;
	int data_len, len;

	if (rxq->left_size > MVNETA_MAX_RX_BUF_SIZE) {
		len = MVNETA_MAX_RX_BUF_SIZE;
		data_len = len;
	} else {
		len = rxq->left_size;
		data_len = len - ETH_FCS_LEN;
	}
	dma_dir = page_pool_get_dma_dir(rxq->page_pool);
	dma_sync_single_for_cpu(dev->dev.parent,
				rx_desc->buf_phys_addr,
				len, dma_dir);
	if (data_len > 0) {
		/* refill descriptor with new buffer later */
		skb_add_rx_frag(rxq->skb,
				skb_shinfo(rxq->skb)->nr_frags,
				page, MVNETA_SKB_HEADROOM, data_len,
				PAGE_SIZE);
	}
	page_pool_release_page(rxq->page_pool, page);
	rx_desc->buf_phys_addr = 0;
	rxq->left_size -= len;
}

/* Main rx processing when using software buffer management */
static int mvneta_rx_swbm(struct napi_struct *napi,
			  struct mvneta_port *pp, int budget,
			  struct mvneta_rx_queue *rxq)
{
	int rcvd_pkts = 0, rcvd_bytes = 0, rx_proc = 0;
	struct net_device *dev = pp->dev;
	int rx_todo, rx_proc;
	int refill = 0;
	u32 rcvd_pkts = 0;
	u32 rcvd_bytes = 0;
	struct bpf_prog *xdp_prog;
	struct xdp_buff xdp_buf;
	int rx_todo, refill;
	u32 xdp_ret = 0;

	/* Get number of received packets */
	rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
	rx_proc = 0;

	rcu_read_lock();
	xdp_prog = READ_ONCE(pp->xdp_prog);
	xdp_buf.rxq = &rxq->xdp_rxq;

	/* Fairness NAPI loop */
	while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
	while (rx_proc < budget && rx_proc < rx_todo) {
		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
		unsigned char *data;
		struct page *page;
		dma_addr_t phys_addr;
		u32 rx_status, index;
		int rx_bytes, skb_size, copy_size;
		int frag_num, frag_size, frag_offset;
		struct page *page;

		index = rx_desc - rxq->descs;
		page = (struct page *)rxq->buf_virt_addr[index];
		data = page_address(page);
		/* Prefetch header */
		prefetch(data);

		phys_addr = rx_desc->buf_phys_addr;
		rx_status = rx_desc->status;
		rx_proc++;
		rxq->refill_num++;

		if (rx_status & MVNETA_RXD_FIRST_DESC) {
			int err;

			/* Check errors only for FIRST descriptor */
			if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
				mvneta_rx_error(pp, rx_desc);
@@ -1969,85 +2269,18 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
				/* leave the descriptor untouched */
				continue;
			}
			rx_bytes = rx_desc->data_size -
				   (ETH_FCS_LEN + MVNETA_MH_SIZE);

			/* Allocate small skb for each new packet */
			skb_size = max(rx_copybreak, rx_header_size);
			rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
			if (unlikely(!rxq->skb)) {
				netdev_err(dev,
					   "Can't allocate skb on queue %d\n",
					   rxq->id);
				dev->stats.rx_dropped++;
				rxq->skb_alloc_err++;
			err = mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf,
						   xdp_prog, page, &xdp_ret);
			if (err)
				continue;
			}
			copy_size = min(skb_size, rx_bytes);

			/* Copy data from buffer to SKB, skip Marvell header */
			memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
			       copy_size);
			skb_put(rxq->skb, copy_size);
			rxq->left_size = rx_bytes - copy_size;

			mvneta_rx_csum(pp, rx_status, rxq->skb);
			if (rxq->left_size == 0) {
				int size = copy_size + MVNETA_MH_SIZE;

				dma_sync_single_range_for_cpu(dev->dev.parent,
							      phys_addr, 0,
							      size,
							      DMA_FROM_DEVICE);

				/* leave the descriptor and buffer untouched */
			} else {
				/* refill descriptor with new buffer later */
				rx_desc->buf_phys_addr = 0;

				frag_num = 0;
				frag_offset = copy_size + MVNETA_MH_SIZE;
				frag_size = min(rxq->left_size,
						(int)(PAGE_SIZE - frag_offset));
				skb_add_rx_frag(rxq->skb, frag_num, page,
						frag_offset, frag_size,
						PAGE_SIZE);
				dma_unmap_page(dev->dev.parent, phys_addr,
					       PAGE_SIZE, DMA_FROM_DEVICE);
				rxq->left_size -= frag_size;
			}
		} else {
			/* Middle or Last descriptor */
			if (unlikely(!rxq->skb)) {
				pr_debug("no skb for rx_status 0x%x\n",
					 rx_status);
				continue;
			}
			if (!rxq->left_size) {
				/* last descriptor has only FCS */
				/* and can be discarded */
				dma_sync_single_range_for_cpu(dev->dev.parent,
							      phys_addr, 0,
							      ETH_FCS_LEN,
							      DMA_FROM_DEVICE);
				/* leave the descriptor and buffer untouched */
			} else {
				/* refill descriptor with new buffer later */
				rx_desc->buf_phys_addr = 0;

				frag_num = skb_shinfo(rxq->skb)->nr_frags;
				frag_offset = 0;
				frag_size = min(rxq->left_size,
						(int)(PAGE_SIZE - frag_offset));
				skb_add_rx_frag(rxq->skb, frag_num, page,
						frag_offset, frag_size,
						PAGE_SIZE);

				dma_unmap_page(dev->dev.parent, phys_addr,
					       PAGE_SIZE, DMA_FROM_DEVICE);

				rxq->left_size -= frag_size;
			}
			mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, page);
		} /* Middle or Last descriptor */

		if (!(rx_status & MVNETA_RXD_LAST_DESC))
@@ -2072,17 +2305,14 @@ static int mvneta_rx_swbm(struct napi_struct *napi,

		/* clean uncomplete skb pointer in queue */
		rxq->skb = NULL;
		rxq->left_size = 0;
	}
	rcu_read_unlock();

	if (rcvd_pkts) {
		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
	if (xdp_ret & MVNETA_XDP_REDIR)
		xdp_do_flush_map();

		u64_stats_update_begin(&stats->syncp);
		stats->rx_packets += rcvd_pkts;
		stats->rx_bytes   += rcvd_bytes;
		u64_stats_update_end(&stats->syncp);
	}
	if (rcvd_pkts)
		mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);

	/* return some buffers to hardware queue, one at a time is too slow */
	refill = mvneta_rx_refill_queue(pp, rxq);
@@ -2206,14 +2436,8 @@ static int mvneta_rx_hwbm(struct napi_struct *napi,
		napi_gro_receive(napi, skb);
	}

	if (rcvd_pkts) {
		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);

		u64_stats_update_begin(&stats->syncp);
		stats->rx_packets += rcvd_pkts;
		stats->rx_bytes   += rcvd_bytes;
		u64_stats_update_end(&stats->syncp);
	}
	if (rcvd_pkts)
		mvneta_update_stats(pp, rcvd_pkts, rcvd_bytes, false);

	/* Update rxq management counters */
	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
@@ -2225,16 +2449,19 @@ static inline void
mvneta_tso_put_hdr(struct sk_buff *skb,
		   struct mvneta_port *pp, struct mvneta_tx_queue *txq)
{
	struct mvneta_tx_desc *tx_desc;
	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
	struct mvneta_tx_desc *tx_desc;

	txq->tx_skb[txq->txq_put_index] = NULL;
	tx_desc = mvneta_txq_next_desc_get(txq);
	tx_desc->data_size = hdr_len;
	tx_desc->command = mvneta_skb_tx_csum(pp, skb);
	tx_desc->command |= MVNETA_TXD_F_DESC;
	tx_desc->buf_phys_addr = txq->tso_hdrs_phys +
				 txq->txq_put_index * TSO_HEADER_SIZE;
	buf->type = MVNETA_TYPE_SKB;
	buf->skb = NULL;

	mvneta_txq_inc_put(txq);
}

@@ -2243,6 +2470,7 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
		    struct sk_buff *skb, char *data, int size,
		    bool last_tcp, bool is_last)
{
	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
	struct mvneta_tx_desc *tx_desc;

	tx_desc = mvneta_txq_next_desc_get(txq);
@@ -2256,7 +2484,8 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,
	}

	tx_desc->command = 0;
	txq->tx_skb[txq->txq_put_index] = NULL;
	buf->type = MVNETA_TYPE_SKB;
	buf->skb = NULL;

	if (last_tcp) {
		/* last descriptor in the TCP packet */
@@ -2264,7 +2493,7 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq,

		/* last descriptor in SKB */
		if (is_last)
			txq->tx_skb[txq->txq_put_index] = skb;
			buf->skb = skb;
	}
	mvneta_txq_inc_put(txq);
	return 0;
@@ -2349,6 +2578,7 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
	int i, nr_frags = skb_shinfo(skb)->nr_frags;

	for (i = 0; i < nr_frags; i++) {
		struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
		void *addr = skb_frag_address(frag);

@@ -2368,12 +2598,13 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb,
		if (i == nr_frags - 1) {
			/* Last descriptor */
			tx_desc->command = MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
			txq->tx_skb[txq->txq_put_index] = skb;
			buf->skb = skb;
		} else {
			/* Descriptor in the middle: Not First, Not Last */
			tx_desc->command = 0;
			txq->tx_skb[txq->txq_put_index] = NULL;
			buf->skb = NULL;
		}
		buf->type = MVNETA_TYPE_SKB;
		mvneta_txq_inc_put(txq);
	}

@@ -2401,6 +2632,7 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
	struct mvneta_port *pp = netdev_priv(dev);
	u16 txq_id = skb_get_queue_mapping(skb);
	struct mvneta_tx_queue *txq = &pp->txqs[txq_id];
	struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
	struct mvneta_tx_desc *tx_desc;
	int len = skb->len;
	int frags = 0;
@@ -2433,16 +2665,17 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
		goto out;
	}

	buf->type = MVNETA_TYPE_SKB;
	if (frags == 1) {
		/* First and Last descriptor */
		tx_cmd |= MVNETA_TXD_FLZ_DESC;
		tx_desc->command = tx_cmd;
		txq->tx_skb[txq->txq_put_index] = skb;
		buf->skb = skb;
		mvneta_txq_inc_put(txq);
	} else {
		/* First but not Last */
		tx_cmd |= MVNETA_TXD_F_DESC;
		txq->tx_skb[txq->txq_put_index] = NULL;
		buf->skb = NULL;
		mvneta_txq_inc_put(txq);
		tx_desc->command = tx_cmd;
		/* Continue with other skb fragments */
@@ -2459,7 +2692,6 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)

out:
	if (frags > 0) {
		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);

		netdev_tx_sent_queue(nq, len);
@@ -2474,10 +2706,7 @@ static netdev_tx_t mvneta_tx(struct sk_buff *skb, struct net_device *dev)
		else
			txq->pending += frags;

		u64_stats_update_begin(&stats->syncp);
		stats->tx_packets++;
		stats->tx_bytes  += len;
		u64_stats_update_end(&stats->syncp);
		mvneta_update_stats(pp, 1, len, true);
	} else {
		dev->stats.tx_dropped++;
		dev_kfree_skb_any(skb);
@@ -2830,11 +3059,55 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
	return rx_done;
}

static int mvneta_create_page_pool(struct mvneta_port *pp,
				   struct mvneta_rx_queue *rxq, int size)
{
	struct bpf_prog *xdp_prog = READ_ONCE(pp->xdp_prog);
	struct page_pool_params pp_params = {
		.order = 0,
		.flags = PP_FLAG_DMA_MAP,
		.pool_size = size,
		.nid = cpu_to_node(0),
		.dev = pp->dev->dev.parent,
		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
	};
	int err;

	rxq->page_pool = page_pool_create(&pp_params);
	if (IS_ERR(rxq->page_pool)) {
		err = PTR_ERR(rxq->page_pool);
		rxq->page_pool = NULL;
		return err;
	}

	err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id);
	if (err < 0)
		goto err_free_pp;

	err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
					 rxq->page_pool);
	if (err)
		goto err_unregister_rxq;

	return 0;

err_unregister_rxq:
	xdp_rxq_info_unreg(&rxq->xdp_rxq);
err_free_pp:
	page_pool_destroy(rxq->page_pool);
	rxq->page_pool = NULL;
	return err;
}

/* Handle rxq fill: allocates rxq skbs; called when initializing a port */
static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
			   int num)
{
	int i;
	int i, err;

	err = mvneta_create_page_pool(pp, rxq, num);
	if (err < 0)
		return err;

	for (i = 0; i < num; i++) {
		memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
@@ -2908,7 +3181,7 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp,
		/* Set Offset */
		mvneta_rxq_offset_set(pp, rxq, 0);
		mvneta_rxq_buf_size_set(pp, rxq, PAGE_SIZE < SZ_64K ?
					PAGE_SIZE :
					MVNETA_MAX_RX_BUF_SIZE :
					MVNETA_RX_BUF_SIZE(pp->pkt_size));
		mvneta_rxq_bm_disable(pp, rxq);
		mvneta_rxq_fill(pp, rxq, rxq->size);
@@ -2989,9 +3262,8 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,

	txq->last_desc = txq->size - 1;

	txq->tx_skb = kmalloc_array(txq->size, sizeof(*txq->tx_skb),
				    GFP_KERNEL);
	if (!txq->tx_skb) {
	txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL);
	if (!txq->buf) {
		dma_free_coherent(pp->dev->dev.parent,
				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
				  txq->descs, txq->descs_phys);
@@ -3003,7 +3275,7 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
					   txq->size * TSO_HEADER_SIZE,
					   &txq->tso_hdrs_phys, GFP_KERNEL);
	if (!txq->tso_hdrs) {
		kfree(txq->tx_skb);
		kfree(txq->buf);
		dma_free_coherent(pp->dev->dev.parent,
				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
				  txq->descs, txq->descs_phys);
@@ -3056,7 +3328,7 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
{
	struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);

	kfree(txq->tx_skb);
	kfree(txq->buf);

	if (txq->tso_hdrs)
		dma_free_coherent(pp->dev->dev.parent,
@@ -3263,6 +3535,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
	}

	if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
		netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
		return -EINVAL;
	}

	dev->mtu = mtu;

	if (!netif_running(dev)) {
@@ -3932,6 +4209,47 @@ static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
	return phylink_mii_ioctl(pp->phylink, ifr, cmd);
}

static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
			    struct netlink_ext_ack *extack)
{
	bool need_update, running = netif_running(dev);
	struct mvneta_port *pp = netdev_priv(dev);
	struct bpf_prog *old_prog;

	if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
		return -EOPNOTSUPP;
	}

	need_update = !!pp->xdp_prog != !!prog;
	if (running && need_update)
		mvneta_stop(dev);

	old_prog = xchg(&pp->xdp_prog, prog);
	if (old_prog)
		bpf_prog_put(old_prog);

	if (running && need_update)
		return mvneta_open(dev);

	return 0;
}

static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
	struct mvneta_port *pp = netdev_priv(dev);

	switch (xdp->command) {
	case XDP_SETUP_PROG:
		return mvneta_xdp_setup(dev, xdp->prog, xdp->extack);
	case XDP_QUERY_PROG:
		xdp->prog_id = pp->xdp_prog ? pp->xdp_prog->aux->id : 0;
		return 0;
	default:
		return -EINVAL;
	}
}

/* Ethtool methods */

/* Set link ksettings (phy address, speed) for ethtools */
@@ -4328,6 +4646,8 @@ static const struct net_device_ops mvneta_netdev_ops = {
	.ndo_fix_features    = mvneta_fix_features,
	.ndo_get_stats64     = mvneta_get_stats64,
	.ndo_do_ioctl        = mvneta_ioctl,
	.ndo_bpf	     = mvneta_xdp,
	.ndo_xdp_xmit        = mvneta_xdp_xmit,
};

static const struct ethtool_ops mvneta_eth_tool_ops = {
@@ -4618,7 +4938,7 @@ static int mvneta_probe(struct platform_device *pdev)
	SET_NETDEV_DEV(dev, &pdev->dev);

	pp->id = global_port_id++;
	pp->rx_offset_correction = 0; /* not relevant for SW BM */
	pp->rx_offset_correction = MVNETA_SKB_HEADROOM;

	/* Obtain access to BM resources if enabled and already initialized */
	bm_node = of_parse_phandle(dn, "buffer-manager", 0);