Commit 20f79739 authored by Wei Fang's avatar Wei Fang Committed by Paolo Abeni
Browse files

net: fec: recycle pages for transmitted XDP frames



Once the XDP frames have been successfully transmitted through the
ndo_xdp_xmit() interface, it's the driver responsibility to free
the frames so that the page_pool can recycle the pages and reuse
them. However, this action is not implemented in the fec driver.
This leads to a user-visible problem that the console will print
the following warning log.

[  157.568851] page_pool_release_retry() stalled pool shutdown 1389 inflight 60 sec
[  217.983446] page_pool_release_retry() stalled pool shutdown 1389 inflight 120 sec
[  278.399006] page_pool_release_retry() stalled pool shutdown 1389 inflight 181 sec
[  338.812885] page_pool_release_retry() stalled pool shutdown 1389 inflight 241 sec
[  399.226946] page_pool_release_retry() stalled pool shutdown 1389 inflight 302 sec

Therefore, to solve this issue, we free XDP frames via xdp_return_frame()
while cleaning the tx BD ring.

Fixes: 6d6b39f1 ("net: fec: add initial XDP support")
Signed-off-by: default avatarWei Fang <wei.fang@nxp.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent be7ecbe7
Loading
Loading
Loading
Loading
+14 −1
Original line number Diff line number Diff line
@@ -544,10 +544,23 @@ enum {
	XDP_STATS_TOTAL,
};

enum fec_txbuf_type {
	FEC_TXBUF_T_SKB,
	FEC_TXBUF_T_XDP_NDO,
};

struct fec_tx_buffer {
	union {
		struct sk_buff *skb;
		struct xdp_frame *xdp;
	};
	enum fec_txbuf_type type;
};

struct fec_enet_priv_tx_q {
	struct bufdesc_prop bd;
	unsigned char *tx_bounce[TX_RING_SIZE];
	struct  sk_buff *tx_skbuff[TX_RING_SIZE];
	struct fec_tx_buffer tx_buf[TX_RING_SIZE];

	unsigned short tx_stop_threshold;
	unsigned short tx_wake_threshold;
+101 −47
Original line number Diff line number Diff line
@@ -397,7 +397,7 @@ static void fec_dump(struct net_device *ndev)
			fec16_to_cpu(bdp->cbd_sc),
			fec32_to_cpu(bdp->cbd_bufaddr),
			fec16_to_cpu(bdp->cbd_datlen),
			txq->tx_skbuff[index]);
			txq->tx_buf[index].skb);
		bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
		index++;
	} while (bdp != txq->bd.base);
@@ -654,7 +654,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,

	index = fec_enet_get_bd_index(last_bdp, &txq->bd);
	/* Save skb pointer */
	txq->tx_skbuff[index] = skb;
	txq->tx_buf[index].skb = skb;

	/* Make sure the updates to rest of the descriptor are performed before
	 * transferring ownership.
@@ -672,9 +672,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,

	skb_tx_timestamp(skb);

	/* Make sure the update to bdp and tx_skbuff are performed before
	 * txq->bd.cur.
	 */
	/* Make sure the update to bdp is performed before txq->bd.cur. */
	wmb();
	txq->bd.cur = bdp;

@@ -862,7 +860,7 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq,
	}

	/* Save skb pointer */
	txq->tx_skbuff[index] = skb;
	txq->tx_buf[index].skb = skb;

	skb_tx_timestamp(skb);
	txq->bd.cur = bdp;
@@ -952,16 +950,33 @@ static void fec_enet_bd_init(struct net_device *dev)
		for (i = 0; i < txq->bd.ring_size; i++) {
			/* Initialize the BD for every fragment in the page. */
			bdp->cbd_sc = cpu_to_fec16(0);
			if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) {
				if (bdp->cbd_bufaddr &&
				    !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
					dma_unmap_single(&fep->pdev->dev,
							 fec32_to_cpu(bdp->cbd_bufaddr),
							 fec16_to_cpu(bdp->cbd_datlen),
							 DMA_TO_DEVICE);
			if (txq->tx_skbuff[i]) {
				dev_kfree_skb_any(txq->tx_skbuff[i]);
				txq->tx_skbuff[i] = NULL;
				if (txq->tx_buf[i].skb) {
					dev_kfree_skb_any(txq->tx_buf[i].skb);
					txq->tx_buf[i].skb = NULL;
				}
			} else {
				if (bdp->cbd_bufaddr)
					dma_unmap_single(&fep->pdev->dev,
							 fec32_to_cpu(bdp->cbd_bufaddr),
							 fec16_to_cpu(bdp->cbd_datlen),
							 DMA_TO_DEVICE);

				if (txq->tx_buf[i].xdp) {
					xdp_return_frame(txq->tx_buf[i].xdp);
					txq->tx_buf[i].xdp = NULL;
				}

				/* restore default tx buffer type: FEC_TXBUF_T_SKB */
				txq->tx_buf[i].type = FEC_TXBUF_T_SKB;
			}

			bdp->cbd_bufaddr = cpu_to_fec32(0);
			bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
		}
@@ -1360,6 +1375,7 @@ static void
fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
{
	struct	fec_enet_private *fep;
	struct xdp_frame *xdpf;
	struct bufdesc *bdp;
	unsigned short status;
	struct	sk_buff	*skb;
@@ -1387,16 +1403,31 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)

		index = fec_enet_get_bd_index(bdp, &txq->bd);

		skb = txq->tx_skbuff[index];
		txq->tx_skbuff[index] = NULL;
		if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
		if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) {
			skb = txq->tx_buf[index].skb;
			txq->tx_buf[index].skb = NULL;
			if (bdp->cbd_bufaddr &&
			    !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
				dma_unmap_single(&fep->pdev->dev,
						 fec32_to_cpu(bdp->cbd_bufaddr),
						 fec16_to_cpu(bdp->cbd_datlen),
						 DMA_TO_DEVICE);
			bdp->cbd_bufaddr = cpu_to_fec32(0);
			if (!skb)
			goto skb_done;
				goto tx_buf_done;
		} else {
			xdpf = txq->tx_buf[index].xdp;
			if (bdp->cbd_bufaddr)
				dma_unmap_single(&fep->pdev->dev,
						 fec32_to_cpu(bdp->cbd_bufaddr),
						 fec16_to_cpu(bdp->cbd_datlen),
						 DMA_TO_DEVICE);
			bdp->cbd_bufaddr = cpu_to_fec32(0);
			if (!xdpf) {
				txq->tx_buf[index].type = FEC_TXBUF_T_SKB;
				goto tx_buf_done;
			}
		}

		/* Check for errors. */
		if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
@@ -1415,16 +1446,26 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
				ndev->stats.tx_carrier_errors++;
		} else {
			ndev->stats.tx_packets++;

			if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB)
				ndev->stats.tx_bytes += skb->len;
			else
				ndev->stats.tx_bytes += xdpf->len;
		}

		/* Deferred means some collisions occurred during transmit,
		 * but we eventually sent the packet OK.
		 */
		if (status & BD_ENET_TX_DEF)
			ndev->stats.collisions++;

		if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) {
			/* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
			 * are to time stamp the packet, so we still need to check time
			 * stamping enabled flag.
			 */
			if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
			     fep->hwts_tx_en) &&
		    fep->bufdesc_ex) {
				     fep->hwts_tx_en) && fep->bufdesc_ex) {
				struct skb_shared_hwtstamps shhwtstamps;
				struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;

@@ -1432,16 +1473,18 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
				skb_tstamp_tx(skb, &shhwtstamps);
			}

		/* Deferred means some collisions occurred during transmit,
		 * but we eventually sent the packet OK.
		 */
		if (status & BD_ENET_TX_DEF)
			ndev->stats.collisions++;

			/* Free the sk buffer associated with this last transmit */
			dev_kfree_skb_any(skb);
skb_done:
		/* Make sure the update to bdp and tx_skbuff are performed
		} else {
			xdp_return_frame(xdpf);

			txq->tx_buf[index].xdp = NULL;
			/* restore default tx buffer type: FEC_TXBUF_T_SKB */
			txq->tx_buf[index].type = FEC_TXBUF_T_SKB;
		}

tx_buf_done:
		/* Make sure the update to bdp and tx_buf are performed
		 * before dirty_tx
		 */
		wmb();
@@ -3249,9 +3292,19 @@ static void fec_enet_free_buffers(struct net_device *ndev)
		for (i = 0; i < txq->bd.ring_size; i++) {
			kfree(txq->tx_bounce[i]);
			txq->tx_bounce[i] = NULL;
			skb = txq->tx_skbuff[i];
			txq->tx_skbuff[i] = NULL;

			if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) {
				skb = txq->tx_buf[i].skb;
				txq->tx_buf[i].skb = NULL;
				dev_kfree_skb(skb);
			} else {
				if (txq->tx_buf[i].xdp) {
					xdp_return_frame(txq->tx_buf[i].xdp);
					txq->tx_buf[i].xdp = NULL;
				}

				txq->tx_buf[i].type = FEC_TXBUF_T_SKB;
			}
		}
	}
}
@@ -3817,7 +3870,8 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
		ebdp->cbd_esc = cpu_to_fec32(estatus);
	}

	txq->tx_skbuff[index] = NULL;
	txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
	txq->tx_buf[index].xdp = frame;

	/* Make sure the updates to rest of the descriptor are performed before
	 * transferring ownership.