Commit 7952d7ed authored by David S. Miller's avatar David S. Miller
Browse files


Tony Nguyen says:

====================
40GbE Intel Wired LAN Driver Updates 2020-09-14

This series contains updates to i40e driver only.

Li RongQing removes binding affinity mask to a fixed CPU and sets
prefetch of Rx buffer page to occur conditionally.

Björn provides AF_XDP performance improvements by not prefetching HW
descriptors, using 16 byte descriptors, and moving buffer allocation
out of Rx processing loop.

v2: Define prefetch_page_address in a common header for patch 2.
Dropped, previous, patch 5 as it is being reworked to be more
generalized.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e0d9ae69 8cbf7414
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -90,7 +90,7 @@
#define I40E_OEM_RELEASE_MASK		0x0000ffff

#define I40E_RX_DESC(R, i)	\
	(&(((union i40e_32byte_rx_desc *)((R)->desc))[i]))
	(&(((union i40e_rx_desc *)((R)->desc))[i]))
#define I40E_TX_DESC(R, i)	\
	(&(((struct i40e_tx_desc *)((R)->desc))[i]))
#define I40E_TX_CTXTDESC(R, i)	\
+4 −6
Original line number Diff line number Diff line
@@ -604,10 +604,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
			} else {
				rxd = I40E_RX_DESC(ring, i);
				dev_info(&pf->pdev->dev,
					 "   d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
					 "   d[%03x] = 0x%016llx 0x%016llx\n",
					 i, rxd->read.pkt_addr,
					 rxd->read.hdr_addr,
					 rxd->read.rsvd1, rxd->read.rsvd2);
					 rxd->read.hdr_addr);
			}
		}
	} else if (cnt == 3) {
@@ -625,10 +624,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
		} else {
			rxd = I40E_RX_DESC(ring, desc_n);
			dev_info(&pf->pdev->dev,
				 "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
				 "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n",
				 vsi_seid, ring_id, desc_n,
				 rxd->read.pkt_addr, rxd->read.hdr_addr,
				 rxd->read.rsvd1, rxd->read.rsvd2);
				 rxd->read.pkt_addr, rxd->read.hdr_addr);
		}
	} else {
		dev_info(&pf->pdev->dev, "dump desc rx/tx/xdp <vsi_seid> <ring_id> [<desc_n>]\n");
+5 −11
Original line number Diff line number Diff line
@@ -3321,8 +3321,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
	rx_ctx.base = (ring->dma / 128);
	rx_ctx.qlen = ring->count;

	/* use 32 byte descriptors */
	rx_ctx.dsize = 1;
	/* use 16 byte descriptors */
	rx_ctx.dsize = 0;

	/* descriptor type is always zero
	 * rx_ctx.dtype = 0;
@@ -11186,11 +11186,10 @@ static int i40e_init_msix(struct i40e_pf *pf)
 * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
 * @vsi: the VSI being configured
 * @v_idx: index of the vector in the vsi struct
 * @cpu: cpu to be used on affinity_mask
 *
 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
 **/
static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
{
	struct i40e_q_vector *q_vector;

@@ -11223,7 +11222,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
{
	struct i40e_pf *pf = vsi->back;
	int err, v_idx, num_q_vectors, current_cpu;
	int err, v_idx, num_q_vectors;

	/* if not MSIX, give the one vector only to the LAN VSI */
	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
@@ -11233,15 +11232,10 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
	else
		return -EINVAL;

	current_cpu = cpumask_first(cpu_online_mask);

	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
		err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
		err = i40e_vsi_alloc_q_vector(vsi, v_idx);
		if (err)
			goto err_out;
		current_cpu = cpumask_next(current_cpu, cpu_online_mask);
		if (unlikely(current_cpu >= nr_cpu_ids))
			current_cpu = cpumask_first(cpu_online_mask);
	}

	return 0;
+3 −3
Original line number Diff line number Diff line
@@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(
	i40e_rx_template,

	TP_PROTO(struct i40e_ring *ring,
		 union i40e_32byte_rx_desc *desc,
		 union i40e_16byte_rx_desc *desc,
		 struct sk_buff *skb),

	TP_ARGS(ring, desc, skb),
@@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(
DEFINE_EVENT(
	i40e_rx_template, i40e_clean_rx_irq,
	TP_PROTO(struct i40e_ring *ring,
		 union i40e_32byte_rx_desc *desc,
		 union i40e_16byte_rx_desc *desc,
		 struct sk_buff *skb),

	TP_ARGS(ring, desc, skb));
@@ -148,7 +148,7 @@ DEFINE_EVENT(
DEFINE_EVENT(
	i40e_rx_template, i40e_clean_rx_irq_rx,
	TP_PROTO(struct i40e_ring *ring,
		 union i40e_32byte_rx_desc *desc,
		 union i40e_16byte_rx_desc *desc,
		 struct sk_buff *skb),

	TP_ARGS(ring, desc, skb));
+17 −4
Original line number Diff line number Diff line
@@ -533,11 +533,11 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
{
	struct i40e_pf *pf = rx_ring->vsi->back;
	struct pci_dev *pdev = pf->pdev;
	struct i40e_32b_rx_wb_qw0 *qw0;
	struct i40e_16b_rx_wb_qw0 *qw0;
	u32 fcnt_prog, fcnt_avail;
	u32 error;

	qw0 = (struct i40e_32b_rx_wb_qw0 *)&qword0_raw;
	qw0 = (struct i40e_16b_rx_wb_qw0 *)&qword0_raw;
	error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;

@@ -1418,7 +1418,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
	u64_stats_init(&rx_ring->syncp);

	/* Round up to nearest 4K */
	rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
	rx_ring->size = rx_ring->count * sizeof(union i40e_rx_desc);
	rx_ring->size = ALIGN(rx_ring->size, 4096);
	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
					   &rx_ring->dma, GFP_KERNEL);
@@ -1953,7 +1953,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
	struct i40e_rx_buffer *rx_buffer;

	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
	prefetchw(rx_buffer->page);
	prefetch_page_address(rx_buffer->page);

	/* we are reusing so sync this buffer for CPU use */
	dma_sync_single_range_for_cpu(rx_ring->dev,
@@ -2295,6 +2295,19 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
	}
}

/**
 * i40e_inc_ntc: Advance the next_to_clean index
 * @rx_ring: Rx ring
 **/
static void i40e_inc_ntc(struct i40e_ring *rx_ring)
{
	u32 ntc = rx_ring->next_to_clean + 1;

	ntc = (ntc < rx_ring->count) ? ntc : 0;
	rx_ring->next_to_clean = ntc;
	prefetch(I40E_RX_DESC(rx_ring, ntc));
}

/**
 * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
 * @rx_ring: rx descriptor ring to transact packets on
Loading