Commit 5af75c74 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed
Browse files

net/mlx5e: Enhanced TX MPWQE for SKBs



This commit adds support for Enhanced TX MPWQE feature in the regular
(SKB) data path. A MPWQE (multi-packet work queue element) can serve
multiple packets, reducing the PCI bandwidth on control traffic.

Two new stats (tx*_mpwqe_blks and tx*_mpwqe_pkts) are added. The feature
is on by default and controlled by the skb_tx_mpwqe private flag.

In a MPWQE, eseg is shared among all packets, so eseg-based offloads
(IPSEC, GENEVE, checksum) run on a separate eseg that is compared to the
eseg of the current MPWQE session to decide if the new packet can be
added to the same session.

MPWQE is not compatible with certain offloads and features, such as TLS
offload, TSO, nonlinear SKBs. If such incompatible features are in use,
the driver gracefully falls back to non-MPWQE.

This change has no performance impact in TCP single stream test and
XDP_TX single stream test.

UDP pktgen, 64-byte packets, single stream, MPWQE off:
  Packet rate: 16.96 Mpps (±0.12 Mpps) -> 17.01 Mpps (±0.20 Mpps)
  Instructions per packet: 421 -> 429
  Cycles per packet: 156 -> 161
  Instructions per cycle: 2.70 -> 2.67

UDP pktgen, 64-byte packets, single stream, MPWQE on:
  Packet rate: 16.96 Mpps (±0.12 Mpps) -> 20.94 Mpps (±0.33 Mpps)
  Instructions per packet: 421 -> 329
  Cycles per packet: 156 -> 123
  Instructions per cycle: 2.70 -> 2.67

Enabling MPWQE can reduce PCI bandwidth:
  PCI Gen2, pktgen at fixed rate of 36864000 pps on 24 CPU cores:
    Inbound PCI utilization with MPWQE off: 80.3%
    Inbound PCI utilization with MPWQE on: 59.0%
  PCI Gen3, pktgen at fixed rate of 56064000 pps on 24 CPU cores:
    Inbound PCI utilization with MPWQE off: 65.4%
    Inbound PCI utilization with MPWQE on: 49.3%

Enabling MPWQE can also reduce CPU load, increasing the packet rate in
case of CPU bottleneck:
  PCI Gen2, pktgen at full rate on 24 CPU cores:
    Packet rate with MPWQE off: 37.5 Mpps
    Packet rate with MPWQE on: 49.0 Mpps
  PCI Gen3, pktgen at full rate on 24 CPU cores:
    Packet rate with MPWQE off: 57.0 Mpps
    Packet rate with MPWQE on: 66.8 Mpps

Burst size in all pktgen tests is 32.

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx
GCC 10.2.0

Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 67044a88
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -221,6 +221,7 @@ enum mlx5e_priv_flag {
	MLX5E_PFLAG_RX_STRIDING_RQ,
	MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
	MLX5E_PFLAG_XDP_TX_MPWQE,
	MLX5E_PFLAG_SKB_TX_MPWQE,
	MLX5E_NUM_PFLAGS, /* Keep last */
};

@@ -305,6 +306,7 @@ struct mlx5e_sq_dma {

enum {
	MLX5E_SQ_STATE_ENABLED,
	MLX5E_SQ_STATE_MPWQE,
	MLX5E_SQ_STATE_RECOVERING,
	MLX5E_SQ_STATE_IPSEC,
	MLX5E_SQ_STATE_AM,
@@ -316,6 +318,7 @@ enum {
struct mlx5e_tx_mpwqe {
	/* Current MPWQE session */
	struct mlx5e_tx_wqe *wqe;
	u32 bytes_count;
	u8 ds_count;
	u8 pkt_count;
	u8 inline_on;
@@ -334,6 +337,7 @@ struct mlx5e_txqsq {
	u16                        pc ____cacheline_aligned_in_smp;
	u16                        skb_fifo_pc;
	u32                        dma_fifo_pc;
	struct mlx5e_tx_mpwqe      mpwqe;

	struct mlx5e_cq            cq;

+1 −0
Original line number Diff line number Diff line
@@ -278,6 +278,7 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
}

void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);

static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
{
+1 −0
Original line number Diff line number Diff line
@@ -205,6 +205,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)

	*session = (struct mlx5e_tx_mpwqe) {
		.wqe = wqe,
		.bytes_count = 0,
		.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
		.pkt_count = 0,
		.inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
+1 −0
Original line number Diff line number Diff line
@@ -147,6 +147,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
	u32 dma_len = xdptxd->len;

	session->pkt_count++;
	session->bytes_count += dma_len;

	if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
		struct mlx5_wqe_inline_seg *inline_dseg =
+18 −11
Original line number Diff line number Diff line
@@ -128,31 +128,38 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
	return true;
}

static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv,
/* Part of the eseg touched by TX offloads */
#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)

static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
				       struct mlx5e_txqsq *sq,
				       struct sk_buff *skb,
					 struct mlx5e_tx_wqe *wqe,
					 struct mlx5e_accel_tx_state *state)
				       struct mlx5_wqe_eth_seg *eseg)
{
#ifdef CONFIG_MLX5_EN_TLS
	mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
#endif

#ifdef CONFIG_MLX5_EN_IPSEC
	if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
		if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, &wqe->eth, skb)))
		if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, eseg, skb)))
			return false;
	}
#endif

#if IS_ENABLED(CONFIG_GENEVE)
	if (skb->encapsulation)
		mlx5e_tx_tunnel_accel(skb, &wqe->eth);
		mlx5e_tx_tunnel_accel(skb, eseg);
#endif

	return true;
}

static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
					 struct mlx5e_tx_wqe *wqe,
					 struct mlx5e_accel_tx_state *state)
{
#ifdef CONFIG_MLX5_EN_TLS
	mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
#endif
}

static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
{
	return mlx5e_ktls_init_rx(priv);
Loading