Commit 9ac93627 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net/mlx4_en: avoid one cache line miss to ring doorbell



This patch caches doorbell address directly in struct mlx4_en_tx_ring.

This removes the need to bring in cpu caches whole struct mlx4_uar
in fast path.

Note that mlx4_uar is not guaranteed to be on a local node,
because mlx4_bf_alloc() uses a single free list (priv->bf_list)
regardless of its node parameter.

This kind of change does matter in presence of light/moderate traffic.
In high stress, this read-only line would be kept hot in caches.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0693b276
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -130,6 +130,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
		ring->bf_enabled = !!(priv->pflags &
				      MLX4_EN_PRIV_FLAGS_BLUEFLAME);
	}
	ring->doorbell_address = ring->bf.uar->map + MLX4_SEND_DOORBELL;

	ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type;
	ring->queue_index = queue_index;
@@ -753,8 +754,7 @@ void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
#else
	iowrite32be(
#endif
		  (__force u32)ring->doorbell_qpn,
		  ring->bf.uar->map + MLX4_SEND_DOORBELL);
		  (__force u32)ring->doorbell_qpn, ring->doorbell_address);
}

static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
+1 −0
Original line number Diff line number Diff line
@@ -283,6 +283,7 @@ struct mlx4_en_tx_ring {
	struct mlx4_bf		bf;

	/* Following part should be mostly read */
	void __iomem		*doorbell_address;
	__be32			doorbell_qpn;
	__be32			mr_key;
	u32			size; /* number of TXBBs */