Commit cd275c23 authored by Gerhard Engleder's avatar Gerhard Engleder Committed by Jakub Kicinski
Browse files

tsnep: Add XDP socket zero-copy TX support



Send and complete XSK pool frames within TX NAPI context. NAPI context
is triggered by ndo_xsk_wakeup.

Test results with A53 1.2GHz:

xdpsock txonly copy mode, 64 byte frames:
                   pps            pkts           1.00
tx                 284,409        11,398,144
Two CPUs with 100% and 10% utilization.

xdpsock txonly zero-copy mode, 64 byte frames:
                   pps            pkts           1.00
tx                 511,929        5,890,368
Two CPUs with 100% and 1% utilization.

xdpsock l2fwd copy mode, 64 byte frames:
                   pps            pkts           1.00
rx                 248,985        7,315,885
tx                 248,921        7,315,885
Two CPUs with 100% and 10% utilization.

xdpsock l2fwd zero-copy mode, 64 byte frames:
                   pps            pkts           1.00
rx                 254,735        3,039,456
tx                 254,735        3,039,456
Two CPUs with 100% and 4% utilization.

Packet rate increases and CPU utilization is reduced in both cases.

Signed-off-by: default avatarGerhard Engleder <gerhard@engleder-embedded.com>
Reviewed-by: default avatarMaciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 3fc23339
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ struct tsnep_tx_entry {
	union {
		struct sk_buff *skb;
		struct xdp_frame *xdpf;
		bool zc;
	};
	size_t len;
	DEFINE_DMA_UNMAP_ADDR(dma);
@@ -88,6 +89,7 @@ struct tsnep_tx {
	int read;
	u32 owner_counter;
	int increment_owner_counter;
	struct xsk_buff_pool *xsk_pool;

	u32 packets;
	u32 bytes;
+119 −11
Original line number Diff line number Diff line
@@ -54,6 +54,8 @@
#define TSNEP_TX_TYPE_SKB_FRAG	BIT(1)
#define TSNEP_TX_TYPE_XDP_TX	BIT(2)
#define TSNEP_TX_TYPE_XDP_NDO	BIT(3)
#define TSNEP_TX_TYPE_XDP	(TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO)
#define TSNEP_TX_TYPE_XSK	BIT(4)

#define TSNEP_XDP_TX		BIT(0)
#define TSNEP_XDP_REDIRECT	BIT(1)
@@ -322,13 +324,47 @@ static void tsnep_tx_init(struct tsnep_tx *tx)
	tx->increment_owner_counter = TSNEP_RING_SIZE - 1;
}

static void tsnep_tx_enable(struct tsnep_tx *tx)
{
	struct netdev_queue *nq;

	nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);

	__netif_tx_lock_bh(nq);
	netif_tx_wake_queue(nq);
	__netif_tx_unlock_bh(nq);
}

static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi)
{
	struct netdev_queue *nq;
	u32 val;

	nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);

	__netif_tx_lock_bh(nq);
	netif_tx_stop_queue(nq);
	__netif_tx_unlock_bh(nq);

	/* wait until TX is done in hardware */
	readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
			   ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
			   1000000);

	/* wait until TX is also done in software */
	while (READ_ONCE(tx->read) != tx->write) {
		napi_schedule(napi);
		napi_synchronize(napi);
	}
}

static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
			      bool last)
{
	struct tsnep_tx_entry *entry = &tx->entry[index];

	entry->properties = 0;
	/* xdpf is union with skb */
	/* xdpf and zc are union with skb */
	if (entry->skb) {
		entry->properties = length & TSNEP_DESC_LENGTH_MASK;
		entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
@@ -646,10 +682,69 @@ static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
	return xmit;
}

static int tsnep_xdp_tx_map_zc(struct xdp_desc *xdpd, struct tsnep_tx *tx)
{
	struct tsnep_tx_entry *entry;
	dma_addr_t dma;

	entry = &tx->entry[tx->write];
	entry->zc = true;

	dma = xsk_buff_raw_get_dma(tx->xsk_pool, xdpd->addr);
	xsk_buff_raw_dma_sync_for_device(tx->xsk_pool, dma, xdpd->len);

	entry->type = TSNEP_TX_TYPE_XSK;
	entry->len = xdpd->len;

	entry->desc->tx = __cpu_to_le64(dma);

	return xdpd->len;
}

static void tsnep_xdp_xmit_frame_ring_zc(struct xdp_desc *xdpd,
					 struct tsnep_tx *tx)
{
	int length;

	length = tsnep_xdp_tx_map_zc(xdpd, tx);

	tsnep_tx_activate(tx, tx->write, length, true);
	tx->write = (tx->write + 1) & TSNEP_RING_MASK;
}

static void tsnep_xdp_xmit_zc(struct tsnep_tx *tx)
{
	int desc_available = tsnep_tx_desc_available(tx);
	struct xdp_desc *descs = tx->xsk_pool->tx_descs;
	int batch, i;

	/* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS
	 * will be available for normal TX path and queue is stopped there if
	 * necessary
	 */
	if (desc_available <= (MAX_SKB_FRAGS + 1))
		return;
	desc_available -= MAX_SKB_FRAGS + 1;

	batch = xsk_tx_peek_release_desc_batch(tx->xsk_pool, desc_available);
	for (i = 0; i < batch; i++)
		tsnep_xdp_xmit_frame_ring_zc(&descs[i], tx);

	if (batch) {
		/* descriptor properties shall be valid before hardware is
		 * notified
		 */
		dma_wmb();

		tsnep_xdp_xmit_flush(tx);
	}
}

static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
{
	struct tsnep_tx_entry *entry;
	struct netdev_queue *nq;
	int xsk_frames = 0;
	int budget = 128;
	int length;
	int count;
@@ -676,7 +771,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
		if ((entry->type & TSNEP_TX_TYPE_SKB) &&
		    skb_shinfo(entry->skb)->nr_frags > 0)
			count += skb_shinfo(entry->skb)->nr_frags;
		else if (!(entry->type & TSNEP_TX_TYPE_SKB) &&
		else if ((entry->type & TSNEP_TX_TYPE_XDP) &&
			 xdp_frame_has_frags(entry->xdpf))
			count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags;

@@ -705,9 +800,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)

		if (entry->type & TSNEP_TX_TYPE_SKB)
			napi_consume_skb(entry->skb, napi_budget);
		else
		else if (entry->type & TSNEP_TX_TYPE_XDP)
			xdp_return_frame_rx_napi(entry->xdpf);
		/* xdpf is union with skb */
		else
			xsk_frames++;
		/* xdpf and zc are union with skb */
		entry->skb = NULL;

		tx->read = (tx->read + count) & TSNEP_RING_MASK;
@@ -718,6 +815,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
		budget--;
	} while (likely(budget));

	if (tx->xsk_pool) {
		if (xsk_frames)
			xsk_tx_completed(tx->xsk_pool, xsk_frames);
		if (xsk_uses_need_wakeup(tx->xsk_pool))
			xsk_set_tx_need_wakeup(tx->xsk_pool);
		tsnep_xdp_xmit_zc(tx);
	}

	if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) &&
	    netif_tx_queue_stopped(nq)) {
		netif_tx_wake_queue(nq);
@@ -765,12 +870,6 @@ static int tsnep_tx_open(struct tsnep_tx *tx)

static void tsnep_tx_close(struct tsnep_tx *tx)
{
	u32 val;

	readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
			   ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
			   1000000);

	tsnep_tx_ring_cleanup(tx);
}

@@ -1786,12 +1885,18 @@ static void tsnep_queue_enable(struct tsnep_queue *queue)
	napi_enable(&queue->napi);
	tsnep_enable_irq(queue->adapter, queue->irq_mask);

	if (queue->tx)
		tsnep_tx_enable(queue->tx);

	if (queue->rx)
		tsnep_rx_enable(queue->rx);
}

static void tsnep_queue_disable(struct tsnep_queue *queue)
{
	if (queue->tx)
		tsnep_tx_disable(queue->tx, &queue->napi);

	napi_disable(&queue->napi);
	tsnep_disable_irq(queue->adapter, queue->irq_mask);

@@ -1908,6 +2013,7 @@ int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool)
	if (running)
		tsnep_queue_disable(queue);

	queue->tx->xsk_pool = pool;
	queue->rx->xsk_pool = pool;

	if (running) {
@@ -1928,6 +2034,7 @@ void tsnep_disable_xsk(struct tsnep_queue *queue)
	tsnep_rx_free_zc(queue->rx);

	queue->rx->xsk_pool = NULL;
	queue->tx->xsk_pool = NULL;

	if (running) {
		tsnep_rx_reopen(queue->rx);
@@ -2438,7 +2545,8 @@ static int tsnep_probe(struct platform_device *pdev)

	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
			       NETDEV_XDP_ACT_NDO_XMIT |
			       NETDEV_XDP_ACT_NDO_XMIT_SG;
			       NETDEV_XDP_ACT_NDO_XMIT_SG |
			       NETDEV_XDP_ACT_XSK_ZEROCOPY;

	/* carrier off reporting is important to ethtool even BEFORE open */
	netif_carrier_off(netdev);