Commit fc9df2a0 authored by Andre Guedes's avatar Andre Guedes Committed by Tony Nguyen
Browse files

igc: Enable RX via AF_XDP zero-copy



Add support for receiving packets via AF_XDP zero-copy mechanism.

Add a new flag to 'enum igc_ring_flags_t' to indicate the ring has
AF_XDP zero-copy enabled so proper ring setup is carried out during ring
configuration in igc_configure_rx_ring().

RX buffers can now be allocated via the shared pages mechanism (default
behavior of the driver) or via xsk pool (when AF_XDP zero-copy is
enabled) so a union is added to the 'struct igc_rx_buffer' to cover both
cases.

When AF_XDP zero-copy is enabled, rx buffers are allocated from the xsk
pool using the new helper igc_alloc_rx_buffers_zc() which is the
counterpart of igc_alloc_rx_buffers().

Likewise other Intel drivers that support AF_XDP zero-copy, in igc we
have a dedicated path for cleaning up rx irqs when zero-copy is enabled.
This avoids adding too many checks within igc_clean_rx_irq(), resulting
in a more readable and efficient code since this function is called from
the hot-path of the driver.

Signed-off-by: default avatarAndre Guedes <andre.guedes@intel.com>
Signed-off-by: default avatarVedang Patel <vedang.patel@intel.com>
Signed-off-by: default avatarJithu Joseph <jithu.joseph@intel.com>
Reviewed-by: default avatarMaciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: default avatarDvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
parent 859b4dfa
Loading
Loading
Loading
Loading
+16 −6
Original line number Diff line number Diff line
@@ -118,6 +118,7 @@ struct igc_ring {
	};

	struct xdp_rxq_info xdp_rxq;
	struct xsk_buff_pool *xsk_pool;
} ____cacheline_internodealigned_in_smp;

/* Board specific private data structure */
@@ -255,6 +256,9 @@ bool igc_has_link(struct igc_adapter *adapter);
void igc_reset(struct igc_adapter *adapter);
int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
void igc_update_stats(struct igc_adapter *adapter);
void igc_disable_rx_ring(struct igc_ring *ring);
void igc_enable_rx_ring(struct igc_ring *ring);
int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);

/* igc_dump declarations */
void igc_rings_dump(struct igc_adapter *adapter);
@@ -432,6 +436,8 @@ struct igc_tx_buffer {
};

struct igc_rx_buffer {
	union {
		struct {
			dma_addr_t dma;
			struct page *page;
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
@@ -441,6 +447,9 @@ struct igc_rx_buffer {
#endif
			__u16 pagecnt_bias;
		};
		struct xdp_buff *xdp;
	};
};

struct igc_q_vector {
	struct igc_adapter *adapter;    /* backlink */
@@ -525,7 +534,8 @@ enum igc_ring_flags_t {
	IGC_RING_FLAG_RX_SCTP_CSUM,
	IGC_RING_FLAG_RX_LB_VLAN_BSWAP,
	IGC_RING_FLAG_TX_CTX_IDX,
	IGC_RING_FLAG_TX_DETECT_HANG
	IGC_RING_FLAG_TX_DETECT_HANG,
	IGC_RING_FLAG_AF_XDP_ZC,
};

#define ring_uses_large_buffer(ring) \
+1 −0
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@ union igc_adv_rx_desc {

/* Additional Receive Descriptor Control definitions */
#define IGC_RXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Rx Queue */
#define IGC_RXDCTL_SWFLUSH		0x04000000 /* Receive Software Flush */

/* SRRCTL bit definitions */
#define IGC_SRRCTL_BSIZEPKT_SHIFT		10 /* Shift _right_ */
+331 −13
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@
#include <linux/pm_runtime.h>
#include <net/pkt_sched.h>
#include <linux/bpf_trace.h>

#include <net/xdp_sock_drv.h>
#include <net/ipv6.h>

#include "igc.h"
@@ -389,12 +389,30 @@ static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
	}
}

static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
{
	struct igc_rx_buffer *bi;
	u16 i;

	for (i = 0; i < ring->count; i++) {
		bi = &ring->rx_buffer_info[i];
		if (!bi->xdp)
			continue;

		xsk_buff_free(bi->xdp);
		bi->xdp = NULL;
	}
}

/**
 * igc_clean_rx_ring - Free Rx Buffers per Queue
 * @ring: ring to free buffers from
 */
static void igc_clean_rx_ring(struct igc_ring *ring)
{
	if (ring->xsk_pool)
		igc_clean_rx_ring_xsk_pool(ring);
	else
		igc_clean_rx_ring_page_shared(ring);

	clear_ring_uses_large_buffer(ring);
@@ -533,6 +551,16 @@ static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
	return err;
}

static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
					      struct igc_ring *ring)
{
	if (!igc_xdp_is_enabled(adapter) ||
	    !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
		return NULL;

	return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
}

/**
 * igc_configure_rx_ring - Configure a receive ring after Reset
 * @adapter: board private structure
@@ -548,9 +576,20 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
	int reg_idx = ring->reg_idx;
	u32 srrctl = 0, rxdctl = 0;
	u64 rdba = ring->dma;
	u32 buf_size;

	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
	if (ring->xsk_pool) {
		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
						   MEM_TYPE_XSK_BUFF_POOL,
						   NULL));
		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
	} else {
		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
					   MEM_TYPE_PAGE_SHARED, NULL));
						   MEM_TYPE_PAGE_SHARED,
						   NULL));
	}

	if (igc_xdp_is_enabled(adapter))
		set_ring_uses_large_buffer(ring);
@@ -574,12 +613,15 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
	ring->next_to_clean = 0;
	ring->next_to_use = 0;

	/* set descriptor configuration */
	srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
	if (ring_uses_large_buffer(ring))
		srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
	if (ring->xsk_pool)
		buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
	else if (ring_uses_large_buffer(ring))
		buf_size = IGC_RXBUFFER_3072;
	else
		srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
		buf_size = IGC_RXBUFFER_2048;

	srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
	srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
	srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;

	wr32(IGC_SRRCTL(reg_idx), srrctl);
@@ -1939,6 +1981,63 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
	}
}

static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
{
	union igc_adv_rx_desc *desc;
	u16 i = ring->next_to_use;
	struct igc_rx_buffer *bi;
	dma_addr_t dma;
	bool ok = true;

	if (!count)
		return ok;

	desc = IGC_RX_DESC(ring, i);
	bi = &ring->rx_buffer_info[i];
	i -= ring->count;

	do {
		bi->xdp = xsk_buff_alloc(ring->xsk_pool);
		if (!bi->xdp) {
			ok = false;
			break;
		}

		dma = xsk_buff_xdp_get_dma(bi->xdp);
		desc->read.pkt_addr = cpu_to_le64(dma);

		desc++;
		bi++;
		i++;
		if (unlikely(!i)) {
			desc = IGC_RX_DESC(ring, 0);
			bi = ring->rx_buffer_info;
			i -= ring->count;
		}

		/* Clear the length for the next_to_use descriptor. */
		desc->wb.upper.length = 0;

		count--;
	} while (count);

	i += ring->count;

	if (ring->next_to_use != i) {
		ring->next_to_use = i;

		/* Force memory writes to complete before letting h/w
		 * know there are new descriptors to fetch.  (Only
		 * applicable for weak-ordered memory model archs,
		 * such as IA-64).
		 */
		wmb();
		writel(i, ring->tail);
	}

	return ok;
}

static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
				  struct xdp_frame *xdpf,
				  struct igc_ring *ring)
@@ -2254,6 +2353,148 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
	return total_packets;
}

static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
					    struct xdp_buff *xdp)
{
	unsigned int metasize = xdp->data - xdp->data_meta;
	unsigned int datasize = xdp->data_end - xdp->data;
	unsigned int totalsize = metasize + datasize;
	struct sk_buff *skb;

	skb = __napi_alloc_skb(&ring->q_vector->napi,
			       xdp->data_end - xdp->data_hard_start,
			       GFP_ATOMIC | __GFP_NOWARN);
	if (unlikely(!skb))
		return NULL;

	skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
	memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize);
	if (metasize)
		skb_metadata_set(skb, metasize);

	return skb;
}

static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
				union igc_adv_rx_desc *desc,
				struct xdp_buff *xdp,
				ktime_t timestamp)
{
	struct igc_ring *ring = q_vector->rx.ring;
	struct sk_buff *skb;

	skb = igc_construct_skb_zc(ring, xdp);
	if (!skb) {
		ring->rx_stats.alloc_failed++;
		return;
	}

	if (timestamp)
		skb_hwtstamps(skb)->hwtstamp = timestamp;

	if (igc_cleanup_headers(ring, desc, skb))
		return;

	igc_process_skb_fields(ring, desc, skb);
	napi_gro_receive(&q_vector->napi, skb);
}

static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
{
	struct igc_adapter *adapter = q_vector->adapter;
	struct igc_ring *ring = q_vector->rx.ring;
	u16 cleaned_count = igc_desc_unused(ring);
	int total_bytes = 0, total_packets = 0;
	u16 ntc = ring->next_to_clean;
	struct bpf_prog *prog;
	bool failure = false;
	int xdp_status = 0;

	rcu_read_lock();

	prog = READ_ONCE(adapter->xdp_prog);

	while (likely(total_packets < budget)) {
		union igc_adv_rx_desc *desc;
		struct igc_rx_buffer *bi;
		ktime_t timestamp = 0;
		unsigned int size;
		int res;

		desc = IGC_RX_DESC(ring, ntc);
		size = le16_to_cpu(desc->wb.upper.length);
		if (!size)
			break;

		/* This memory barrier is needed to keep us from reading
		 * any other fields out of the rx_desc until we know the
		 * descriptor has been written back
		 */
		dma_rmb();

		bi = &ring->rx_buffer_info[ntc];

		if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
							bi->xdp->data);

			bi->xdp->data += IGC_TS_HDR_LEN;

			/* HW timestamp has been copied into local variable. Metadata
			 * length when XDP program is called should be 0.
			 */
			bi->xdp->data_meta += IGC_TS_HDR_LEN;
			size -= IGC_TS_HDR_LEN;
		}

		bi->xdp->data_end = bi->xdp->data + size;
		xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);

		res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
		switch (res) {
		case IGC_XDP_PASS:
			igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
			fallthrough;
		case IGC_XDP_CONSUMED:
			xsk_buff_free(bi->xdp);
			break;
		case IGC_XDP_TX:
		case IGC_XDP_REDIRECT:
			xdp_status |= res;
			break;
		}

		bi->xdp = NULL;
		total_bytes += size;
		total_packets++;
		cleaned_count++;
		ntc++;
		if (ntc == ring->count)
			ntc = 0;
	}

	ring->next_to_clean = ntc;
	rcu_read_unlock();

	if (cleaned_count >= IGC_RX_BUFFER_WRITE)
		failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);

	if (xdp_status)
		igc_finalize_xdp(adapter, xdp_status);

	igc_update_rx_stats(q_vector, total_packets, total_bytes);

	if (xsk_uses_need_wakeup(ring->xsk_pool)) {
		if (failure || ring->next_to_clean == ring->next_to_use)
			xsk_set_rx_need_wakeup(ring->xsk_pool);
		else
			xsk_clear_rx_need_wakeup(ring->xsk_pool);
		return total_packets;
	}

	return failure ? budget : total_packets;
}

static void igc_update_tx_stats(struct igc_q_vector *q_vector,
				unsigned int packets, unsigned int bytes)
{
@@ -2946,6 +3187,9 @@ static void igc_configure(struct igc_adapter *adapter)
	for (i = 0; i < adapter->num_rx_queues; i++) {
		struct igc_ring *ring = adapter->rx_ring[i];

		if (ring->xsk_pool)
			igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
		else
			igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
	}
}
@@ -3561,14 +3805,17 @@ static int igc_poll(struct napi_struct *napi, int budget)
	struct igc_q_vector *q_vector = container_of(napi,
						     struct igc_q_vector,
						     napi);
	struct igc_ring *rx_ring = q_vector->rx.ring;
	bool clean_complete = true;
	int work_done = 0;

	if (q_vector->tx.ring)
		clean_complete = igc_clean_tx_irq(q_vector, budget);

	if (q_vector->rx.ring) {
		int cleaned = igc_clean_rx_irq(q_vector, budget);
	if (rx_ring) {
		int cleaned = rx_ring->xsk_pool ?
			      igc_clean_rx_irq_zc(q_vector, budget) :
			      igc_clean_rx_irq(q_vector, budget);

		work_done += cleaned;
		if (cleaned >= budget)
@@ -5206,6 +5453,9 @@ static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
	switch (bpf->command) {
	case XDP_SETUP_PROG:
		return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
	case XDP_SETUP_XSK_POOL:
		return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
					  bpf->xsk.queue_id);
	default:
		return -EOPNOTSUPP;
	}
@@ -5251,6 +5501,43 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
	return num_frames - drops;
}

static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
					struct igc_q_vector *q_vector)
{
	struct igc_hw *hw = &adapter->hw;
	u32 eics = 0;

	eics |= q_vector->eims_value;
	wr32(IGC_EICS, eics);
}

int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{
	struct igc_adapter *adapter = netdev_priv(dev);
	struct igc_q_vector *q_vector;
	struct igc_ring *ring;

	if (test_bit(__IGC_DOWN, &adapter->state))
		return -ENETDOWN;

	if (!igc_xdp_is_enabled(adapter))
		return -ENXIO;

	if (queue_id >= adapter->num_rx_queues)
		return -EINVAL;

	ring = adapter->rx_ring[queue_id];

	if (!ring->xsk_pool)
		return -ENXIO;

	q_vector = adapter->q_vector[queue_id];
	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
		igc_trigger_rxtxq_interrupt(adapter, q_vector);

	return 0;
}

static const struct net_device_ops igc_netdev_ops = {
	.ndo_open		= igc_open,
	.ndo_stop		= igc_close,
@@ -5266,6 +5553,7 @@ static const struct net_device_ops igc_netdev_ops = {
	.ndo_setup_tc		= igc_setup_tc,
	.ndo_bpf		= igc_bpf,
	.ndo_xdp_xmit		= igc_xdp_xmit,
	.ndo_xsk_wakeup		= igc_xsk_wakeup,
};

/* PCIe configuration access */
@@ -6018,6 +6306,36 @@ struct net_device *igc_get_hw_dev(struct igc_hw *hw)
	return adapter->netdev;
}

static void igc_disable_rx_ring_hw(struct igc_ring *ring)
{
	struct igc_hw *hw = &ring->q_vector->adapter->hw;
	u8 idx = ring->reg_idx;
	u32 rxdctl;

	rxdctl = rd32(IGC_RXDCTL(idx));
	rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
	rxdctl |= IGC_RXDCTL_SWFLUSH;
	wr32(IGC_RXDCTL(idx), rxdctl);
}

void igc_disable_rx_ring(struct igc_ring *ring)
{
	igc_disable_rx_ring_hw(ring);
	igc_clean_rx_ring(ring);
}

void igc_enable_rx_ring(struct igc_ring *ring)
{
	struct igc_adapter *adapter = ring->q_vector->adapter;

	igc_configure_rx_ring(adapter, ring);

	if (ring->xsk_pool)
		igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
	else
		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
}

/**
 * igc_init_module - Driver Registration Routine
 *
+100 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020, Intel Corporation. */

#include <net/xdp_sock_drv.h>

#include "igc.h"
#include "igc_xdp.h"

@@ -31,3 +33,101 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,

	return 0;
}

static int igc_xdp_enable_pool(struct igc_adapter *adapter,
			       struct xsk_buff_pool *pool, u16 queue_id)
{
	struct net_device *ndev = adapter->netdev;
	struct device *dev = &adapter->pdev->dev;
	struct igc_ring *rx_ring;
	struct napi_struct *napi;
	bool needs_reset;
	u32 frame_size;
	int err;

	if (queue_id >= adapter->num_rx_queues)
		return -EINVAL;

	frame_size = xsk_pool_get_rx_frame_size(pool);
	if (frame_size < ETH_FRAME_LEN + VLAN_HLEN * 2) {
		/* When XDP is enabled, the driver doesn't support frames that
		 * span over multiple buffers. To avoid that, we check if xsk
		 * frame size is big enough to fit the max ethernet frame size
		 * + vlan double tagging.
		 */
		return -EOPNOTSUPP;
	}

	err = xsk_pool_dma_map(pool, dev, IGC_RX_DMA_ATTR);
	if (err) {
		netdev_err(ndev, "Failed to map xsk pool\n");
		return err;
	}

	needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);

	rx_ring = adapter->rx_ring[queue_id];
	napi = &rx_ring->q_vector->napi;

	if (needs_reset) {
		igc_disable_rx_ring(rx_ring);
		napi_disable(napi);
	}

	set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);

	if (needs_reset) {
		napi_enable(napi);
		igc_enable_rx_ring(rx_ring);

		err = igc_xsk_wakeup(ndev, queue_id, XDP_WAKEUP_RX);
		if (err) {
			xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
			return err;
		}
	}

	return 0;
}

static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
{
	struct xsk_buff_pool *pool;
	struct igc_ring *rx_ring;
	struct napi_struct *napi;
	bool needs_reset;

	if (queue_id >= adapter->num_rx_queues)
		return -EINVAL;

	pool = xsk_get_pool_from_qid(adapter->netdev, queue_id);
	if (!pool)
		return -EINVAL;

	needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);

	rx_ring = adapter->rx_ring[queue_id];
	napi = &rx_ring->q_vector->napi;

	if (needs_reset) {
		igc_disable_rx_ring(rx_ring);
		napi_disable(napi);
	}

	xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
	clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);

	if (needs_reset) {
		napi_enable(napi);
		igc_enable_rx_ring(rx_ring);
	}

	return 0;
}

int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool,
		       u16 queue_id)
{
	return pool ? igc_xdp_enable_pool(adapter, pool, queue_id) :
		      igc_xdp_disable_pool(adapter, queue_id);
}
+2 −0
Original line number Diff line number Diff line
@@ -6,6 +6,8 @@

int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
		     struct netlink_ext_ack *extack);
int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool,
		       u16 queue_id);

static inline bool igc_xdp_is_enabled(struct igc_adapter *adapter)
{