Commit 338c46c6 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed
Browse files

net/mlx5e: Support multiple SKBs in a TX WQE



TX MPWQE support for SKBs is coming in one of the following patches, and
a single MPWQE can send multiple SKBs. This commit prepares the TX path
code to handle such cases:

1. An additional FIFO for SKBs is added, just like the FIFO for DMA
chunks.

2. struct mlx5e_tx_wqe_info will contain num_fifo_pkts. If a given WQE
contains only one packet, num_fifo_pkts will be zero, and the SKB will
be stored in mlx5e_tx_wqe_info, as usual. If num_fifo_pkts > 0, the SKB
pointer will be NULL, and the SKBs will be stored in the FIFO.

This change has no performance impact in TCP single stream test and
XDP_TX single stream test.

When compiled with a recent GCC, this change shows no visible
performance impact on UDP pktgen (burst 32) single stream test either:
  Packet rate: 16.95 Mpps (±0.15 Mpps) -> 16.96 Mpps (±0.12 Mpps)
  Instructions per packet: 429 -> 421
  Cycles per packet: 160 -> 156
  Instructions per cycle: 2.69 -> 2.70

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx
GCC 10.2.0

Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 56e4da66
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -318,11 +318,13 @@ struct mlx5e_txqsq {

	/* dirtied @completion */
	u16                        cc;
	u16                        skb_fifo_cc;
	u32                        dma_fifo_cc;
	struct dim                 dim; /* Adaptive Moderation */

	/* dirtied @xmit */
	u16                        pc ____cacheline_aligned_in_smp;
	u16                        skb_fifo_pc;
	u32                        dma_fifo_pc;

	struct mlx5e_cq            cq;
@@ -330,9 +332,11 @@ struct mlx5e_txqsq {
	/* read only */
	struct mlx5_wq_cyc         wq;
	u32                        dma_fifo_mask;
	u16                        skb_fifo_mask;
	struct mlx5e_sq_stats     *stats;
	struct {
		struct mlx5e_sq_dma       *dma_fifo;
		struct sk_buff           **skb_fifo;
		struct mlx5e_tx_wqe_info  *wqe_info;
	} db;
	void __iomem              *uar_map;
+18 −0
Original line number Diff line number Diff line
@@ -105,6 +105,7 @@ struct mlx5e_tx_wqe_info {
	u32 num_bytes;
	u8 num_wqebbs;
	u8 num_dma;
	u8 num_fifo_pkts;
#ifdef CONFIG_MLX5_EN_TLS
	struct page *resync_dump_frag_page;
#endif
@@ -231,6 +232,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
	dma->type = map_type;
}

static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
{
	return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
}

static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
{
	struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);

	*skb_item = skb;
}

static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
{
	return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
}

static inline void
mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
{
+7 −3
Original line number Diff line number Diff line
@@ -29,20 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
					   struct mlx5e_tx_wqe_info *wi,
					   u32 *dma_fifo_cc);
static inline void
static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
					  struct mlx5e_tx_wqe_info *wi,
					  u32 *dma_fifo_cc)
{
	if (unlikely(wi->resync_dump_frag_page))
	if (unlikely(wi->resync_dump_frag_page)) {
		mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
		return true;
	}
	return false;
}
#else
static inline void
static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
					  struct mlx5e_tx_wqe_info *wi,
					  u32 *dma_fifo_cc)
{
	return false;
}

#endif /* CONFIG_MLX5_EN_TLS */
+6 −1
Original line number Diff line number Diff line
@@ -1047,6 +1047,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
{
	kvfree(sq->db.wqe_info);
	kvfree(sq->db.skb_fifo);
	kvfree(sq->db.dma_fifo);
}

@@ -1058,15 +1059,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
	sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
						   sizeof(*sq->db.dma_fifo)),
					GFP_KERNEL, numa);
	sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
						   sizeof(*sq->db.skb_fifo)),
					GFP_KERNEL, numa);
	sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
						   sizeof(*sq->db.wqe_info)),
					GFP_KERNEL, numa);
	if (!sq->db.dma_fifo || !sq->db.wqe_info) {
	if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
		mlx5e_free_txqsq_db(sq);
		return -ENOMEM;
	}

	sq->dma_fifo_mask = df_sz - 1;
	sq->skb_fifo_mask = df_sz - 1;

	return 0;
}
+52 −17
Original line number Diff line number Diff line
@@ -342,6 +342,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
		.num_bytes = attr->num_bytes,
		.num_dma = num_dma,
		.num_wqebbs = wqe_attr->num_wqebbs,
		.num_fifo_pkts = 0,
	};

	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
@@ -489,6 +490,18 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
	napi_consume_skb(skb, napi_budget);
}

static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
					  struct mlx5_cqe64 *cqe, int napi_budget)
{
	int i;

	for (i = 0; i < wi->num_fifo_pkts; i++) {
		struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);

		mlx5e_consume_skb(sq, skb, cqe, napi_budget);
	}
}

bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
	struct mlx5e_sq_stats *stats;
@@ -534,26 +547,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
		wqe_counter = be16_to_cpu(cqe->wqe_counter);

		do {
			struct sk_buff *skb;

			last_wqe = (sqcc == wqe_counter);

			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
			wi = &sq->db.wqe_info[ci];
			skb = wi->skb;

			sqcc += wi->num_wqebbs;

			if (unlikely(!skb)) {
				mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
			if (likely(wi->skb)) {
				mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
				mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);

				npkts++;
				nbytes += wi->num_bytes;
				continue;
			}

			if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
									       &dma_fifo_cc)))
				continue;

			if (wi->num_fifo_pkts) {
				mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
			mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
				mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);

			npkts++;
				npkts += wi->num_fifo_pkts;
				nbytes += wi->num_bytes;
			}
		} while (!last_wqe);

		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
@@ -592,12 +612,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
	return (i == MLX5E_TX_CQ_POLL_BUDGET);
}

static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
{
	int i;

	for (i = 0; i < wi->num_fifo_pkts; i++)
		dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
}

void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{
	struct mlx5e_tx_wqe_info *wi;
	u32 dma_fifo_cc, nbytes = 0;
	u16 ci, sqcc, npkts = 0;
	struct sk_buff *skb;

	sqcc = sq->cc;
	dma_fifo_cc = sq->dma_fifo_cc;
@@ -605,21 +632,29 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
	while (sqcc != sq->pc) {
		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
		wi = &sq->db.wqe_info[ci];
		skb = wi->skb;

		sqcc += wi->num_wqebbs;

		if (!skb) {
			mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
		if (likely(wi->skb)) {
			mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
			dev_kfree_skb_any(wi->skb);

			npkts++;
			nbytes += wi->num_bytes;
			continue;
		}

		if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
			continue;

		if (wi->num_fifo_pkts) {
			mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
		dev_kfree_skb_any(skb);
			mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);

		npkts++;
			npkts += wi->num_fifo_pkts;
			nbytes += wi->num_bytes;
		}
	}

	sq->dma_fifo_cc = dma_fifo_cc;
	sq->cc = sqcc;