Commit 6690c2c4 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mlx5-xsk-updates-part2-2022-09-28'

Saeed Mahameed says:

====================
mlx5 xsk updates part2 2022-09-28

XSK buffer improvements, This is part #2 of 4 parts series.

 1) Expose xsk min chunk size to drivers, to allow the driver to adjust to a
   better buffer stride size

 2) Adjust MTT page size to the XSK frame size, to avoid umem overrun in
  certain situations.

 3) Use xsk frame size as the striding RQ page size for XSK RQs

 4) KSM for unaligned XSK, KSM allows arbitrary buffer chunk lengths
    registration in HW, which makes more sense for unaligned XSK.

 4) More cleanups and optimizations in preparation for next improvements
    in part3

part 1: https://lore.kernel.org/netdev/20220927203611.244301-1-saeed@kernel.org/
====================

Link: https://lore.kernel.org/r/20220929072156.93299-1-saeed@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 0f5ef005 8f5ed1c1
Loading
Loading
Loading
Loading
+38 −34
Original line number Diff line number Diff line
@@ -93,28 +93,30 @@ struct page_pool;
#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
	MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))

#define MLX5_MPWRQ_LOG_WQE_SZ			18
#define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
				    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
#define MLX5_MPWRQ_PAGES_PER_WQE		BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18

/* Keep in sync with mlx5e_mpwrq_log_wqe_sz.
 * These are theoretical maximums, which can be further restricted by
 * capabilities. These values are used for static resource allocations and
 * sanity checks.
 * MLX5_SEND_WQE_MAX_SIZE is a bit bigger than the maximum cacheline-aligned WQE
 * size actually used at runtime, but it's not a problem when calculating static
 * array sizes.
 */
#define MLX5_UMR_MAX_MTT_SPACE \
	(ALIGN_DOWN(MLX5_SEND_WQE_MAX_SIZE - sizeof(struct mlx5e_umr_wqe), \
		    MLX5_UMR_MTT_ALIGNMENT))
#define MLX5_MPWRQ_MAX_PAGES_PER_WQE \
	rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt))

#define MLX5_ALIGN_MTTS(mtts)		(ALIGN(mtts, 8))
#define MLX5_ALIGNED_MTTS_OCTW(mtts)	((mtts) / 2)
#define MLX5_MTT_OCTW(mtts)		(MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts)))
/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
 * WQEs, This page will absorb write overflow by the hardware, when
 * receiving packets larger than MTU. These oversize packets are
 * dropped by the driver at a later stage.
 */
#define MLX5E_REQUIRED_WQE_MTTS		(MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1))
#define MLX5_KSM_OCTW(ksms)             (ksms)
#define MLX5E_MAX_RQ_NUM_MTTS	\
	(ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */
#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW	\
		(ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \
	(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \
	 (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU))

#define MLX5E_MIN_SKB_FRAG_SZ		(MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM))
#define MLX5E_LOG_MAX_RX_WQE_BULK	\
@@ -126,8 +128,7 @@ struct page_pool;

#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK)
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE                0xa
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd,	\
					       MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW)
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE		0xd

#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2

@@ -266,6 +267,7 @@ struct mlx5e_umr_wqe {
	union {
		DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts);
		DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms);
		DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms);
	};
};

@@ -472,13 +474,10 @@ struct mlx5e_txqsq {
	cqe_ts_to_ns               ptp_cyc2time;
} ____cacheline_aligned_in_smp;

struct mlx5e_dma_info {
	dma_addr_t addr;
	union {
union mlx5e_alloc_unit {
	struct page *page;
	struct xdp_buff *xsk;
};
};

/* XDP packets can be transmitted in different ways. On completion, we need to
 * distinguish between them to clean up things in a proper way.
@@ -606,15 +605,15 @@ struct mlx5e_icosq {
} ____cacheline_aligned_in_smp;

struct mlx5e_wqe_frag_info {
	struct mlx5e_dma_info *di;
	union mlx5e_alloc_unit *au;
	u32 offset;
	bool last_in_page;
};

struct mlx5e_mpw_info {
	u16 consumed_strides;
	DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
	struct mlx5e_dma_info dma_info[];
	DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
	union mlx5e_alloc_unit alloc_units[];
};

#define MLX5E_MAX_RX_FRAGS 4
@@ -622,13 +621,13 @@ struct mlx5e_mpw_info {
/* a single cache unit is capable to serve one napi call (for non-striding rq)
 * or a MPWQE (for striding rq).
 */
#define MLX5E_CACHE_UNIT	(MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
				 MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
			  MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
#define MLX5E_CACHE_SIZE	(4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
struct mlx5e_page_cache {
	u32 head;
	u32 tail;
	struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
	struct page *page_cache[MLX5E_CACHE_SIZE];
};

struct mlx5e_rq;
@@ -663,6 +662,11 @@ struct mlx5e_rq_frags_info {
	u8 wqe_bulk;
};

struct mlx5e_dma_info {
	dma_addr_t addr;
	struct page *page;
};

struct mlx5e_shampo_hd {
	u32 mkey;
	struct mlx5e_dma_info *info;
@@ -688,7 +692,7 @@ struct mlx5e_rq {
		struct {
			struct mlx5_wq_cyc          wq;
			struct mlx5e_wqe_frag_info *frags;
			struct mlx5e_dma_info      *di;
			union mlx5e_alloc_unit     *alloc_units;
			struct mlx5e_rq_frags_info  info;
			mlx5e_fp_skb_from_cqe       skb_from_cqe;
		} wqe;
@@ -697,6 +701,7 @@ struct mlx5e_rq {
			struct mlx5e_umr_wqe   umr_wqe;
			struct mlx5e_mpw_info *info;
			mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
			__be32                 umr_mkey_be;
			u16                    num_strides;
			u16                    actual_wq_head;
			u8                     log_stride_sz;
@@ -708,6 +713,7 @@ struct mlx5e_rq {
			u8                     pages_per_wqe;
			u8                     umr_wqebbs;
			u8                     mtts_per_wqe;
			u8                     unaligned;
			struct mlx5e_shampo_hd *shampo;
		} mpwqe;
	};
@@ -758,7 +764,6 @@ struct mlx5e_rq {
	u32                    rqn;
	struct mlx5_core_dev  *mdev;
	struct mlx5e_channel  *channel;
	u32  umr_mkey;
	struct mlx5e_dma_info  wqe_overflow;

	/* XDP read-mostly */
@@ -1008,7 +1013,8 @@ struct mlx5e_profile {

void mlx5e_build_ptys2ethtool_map(void);

bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
					    bool unaligned);

void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close);
void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
@@ -1138,8 +1144,6 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv);
int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
		       struct mlx5e_rq *drop_rq);
void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node);
void mlx5e_free_di_list(struct mlx5e_rq *rq);

int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
+248 −51
Original line number Diff line number Diff line
@@ -6,16 +6,112 @@
#include "en/port.h"
#include "en_accel/en_accel.h"
#include "en_accel/ipsec.h"
#include <net/xdp_sock_drv.h>

u16 mlx5e_mpwrq_umr_wqe_sz(u8 pages_per_wqe)
static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
{
	return sizeof(struct mlx5e_umr_wqe) +
		ALIGN(pages_per_wqe * sizeof(struct mlx5_mtt), MLX5_UMR_MTT_ALIGNMENT);
	u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size);

	return min_page_shift ? : 12;
}

u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
{
	u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
	u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev);

	/* Regular RQ uses order-0 pages, the NIC must be able to map them. */
	if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift))
		min_page_shift = req_page_shift;

	return max(req_page_shift, min_page_shift);
}

u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned)
{
	u8 umr_entry_size = unaligned ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
	u8 max_pages_per_wqe, max_log_mpwqe_size;
	u16 max_wqe_size;

	/* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */
	max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB;
	max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe),
				       MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size;
	max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift;

	WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU);

	return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ);
}

u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned)
{
	u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned);
	u8 pages_per_wqe;

	pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1;

	/* Sanity check for further calculations to succeed. */
	BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64);
	if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE))
		return MLX5_MPWRQ_MAX_PAGES_PER_WQE;

	return pages_per_wqe;
}

u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned)
{
	u8 umr_entry_size = unaligned ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
	u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, unaligned);
	u16 umr_wqe_sz;

	umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) +
		ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);

	WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK);

	return umr_wqe_sz;
}

u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned)
{
	return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, unaligned),
			    MLX5_SEND_WQE_BB);
}

u8 mlx5e_mpwrq_umr_wqebbs(u8 pages_per_wqe)
u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned)
{
	return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(pages_per_wqe), MLX5_SEND_WQE_BB);
	/* Add another page as a buffer between WQEs. This page will absorb
	 * write overflow by the hardware, when receiving packets larger than
	 * MTU. These oversize packets are dropped by the driver at a later
	 * stage.
	 */
	return MLX5_ALIGN_MTTS(mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, unaligned) + 1);
}

u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, bool unaligned)
{
	if (unaligned)
		return min(MLX5E_MAX_RQ_NUM_KSMS,
			   1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size));

	return MLX5E_MAX_RQ_NUM_MTTS;
}

static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift,
				      bool unaligned)
{
	u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, unaligned);
	u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, unaligned);

	return ilog2(max_entries / mtts_per_wqe);
}

u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, bool unaligned)
{
	return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, unaligned) +
		mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned) -
		MLX5E_ORDER2_MAX_PACKET_MTU;
}

u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
@@ -52,14 +148,16 @@ static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
	return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
}

static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5e_params *params,
					 struct mlx5e_xsk_param *xsk)
static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
					 struct mlx5e_params *params,
					 struct mlx5e_xsk_param *xsk,
					 bool mpwqe)
{
	/* XSK frames are mapped as individual pages, because frames may come in
	 * an arbitrary order from random locations in the UMEM.
	 */
	if (xsk)
		return PAGE_SIZE;
		return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;

	/* XDP in mlx5e doesn't support multiple packets per page. */
	if (params->xdp_prog)
@@ -68,15 +166,20 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5e_params *params,
	return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
}

static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
				       struct mlx5e_params *params,
				       struct mlx5e_xsk_param *xsk)
{
	u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(params, xsk);
	u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true);
	u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
	bool unaligned = xsk ? xsk->unaligned : false;

	return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_stride_sz);
	return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned) -
		order_base_2(linear_stride_sz);
}

bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
			    struct mlx5e_params *params,
			    struct mlx5e_xsk_param *xsk)
{
	if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
@@ -96,9 +199,11 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
}

static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
					  u8 log_stride_sz, u8 log_num_strides)
					  u8 log_stride_sz, u8 log_num_strides,
					  u8 page_shift, bool unaligned)
{
	if (log_stride_sz + log_num_strides != MLX5_MPWRQ_LOG_WQE_SZ)
	if (log_stride_sz + log_num_strides !=
	    mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned))
		return false;

	if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE ||
@@ -118,28 +223,53 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
				  struct mlx5e_params *params,
				  struct mlx5e_xsk_param *xsk)
{
	s8 log_num_strides;
	u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
	bool unaligned = xsk ? xsk->unaligned : false;
	u8 log_num_strides;
	u8 log_stride_sz;
	u8 log_wqe_sz;

	if (!mlx5e_rx_is_linear_skb(mdev, params, xsk))
		return false;

	log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
	log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned);

	if (!mlx5e_rx_is_linear_skb(params, xsk))
	if (log_wqe_sz < log_stride_sz)
		return false;

	log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(params, xsk));
	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - log_stride_sz;
	log_num_strides = log_wqe_sz - log_stride_sz;

	return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides);
	return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz,
					     log_num_strides, page_shift,
					     unaligned);
}

u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
			       struct mlx5e_params *params,
			       struct mlx5e_xsk_param *xsk)
{
	u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
	u8 log_pkts_per_wqe, page_shift, max_log_rq_size;
	bool unaligned = xsk ? xsk->unaligned : false;

	log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk);
	page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
	max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, unaligned);

	/* Numbers are unsigned, don't subtract to avoid underflow. */
	if (params->log_rq_mtu_frames <
	    log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
		return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;

	/* Ethtool's rx_max_pending is calculated for regular RQ, that uses
	 * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a
	 * frame size not equal to PAGE_SIZE.
	 * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on
	 * unexpected failure.
	 */
	if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size))
		return max_log_rq_size;

	return params->log_rq_mtu_frames - log_pkts_per_wqe;
}

@@ -169,7 +299,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
				   struct mlx5e_xsk_param *xsk)
{
	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
		return order_base_2(mlx5e_rx_get_linear_stride_sz(params, xsk));
		return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));

	return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
@@ -178,7 +308,10 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
				   struct mlx5e_params *params,
				   struct mlx5e_xsk_param *xsk)
{
	return MLX5_MPWRQ_LOG_WQE_SZ -
	u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
	bool unaligned = xsk ? xsk->unaligned : false;

	return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, unaligned) -
		mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
}

@@ -327,7 +460,9 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev)

int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
	if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
	u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL);

	if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, false))
		return -EOPNOTSUPP;

	if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
@@ -339,12 +474,27 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params
int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
			     struct mlx5e_xsk_param *xsk)
{
	if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
	u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
	bool unaligned = xsk ? xsk->unaligned : false;
	u16 max_mtu_pkts;

	if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, xsk->unaligned))
		return -EOPNOTSUPP;

	if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
		return -EINVAL;

	/* Current RQ length is too big for the given frame size, the
	 * needed number of WQEs exceeds the maximum.
	 */
	max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
			     mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned));
	if (params->log_rq_mtu_frames > max_mtu_pkts) {
		mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n",
			      1 << params->log_rq_mtu_frames, xsk->chunk_size);
		return -EINVAL;
	}

	return 0;
}

@@ -358,7 +508,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
		       BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
		       BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) :
		       BIT(params->log_rq_mtu_frames),
		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
@@ -385,7 +535,7 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
	     MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) &&
	    !mlx5e_mpwrq_validate_regular(mdev, params) &&
	    (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
	     !mlx5e_rx_is_linear_skb(params, NULL)))
	     !mlx5e_rx_is_linear_skb(mdev, params, NULL)))
		MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
	mlx5e_set_rq_type(mdev, params);
	mlx5e_init_rq_type_params(mdev, params);
@@ -428,10 +578,10 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
	int max_mtu;
	int i;

	if (mlx5e_rx_is_linear_skb(params, xsk)) {
	if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) {
		int frag_stride;

		frag_stride = mlx5e_rx_get_linear_stride_sz(params, xsk);
		frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false);

		info->arr[0].frag_size = byte_count;
		info->arr[0].frag_stride = frag_stride;
@@ -528,7 +678,7 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
	u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
	int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
	u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
	int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(params, xsk));
	int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
	int wqe_size = BIT(log_stride_sz) * num_strides;

	/* +1 is for the case that the pkt_per_rsrv dont consume the reservation
@@ -552,7 +702,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
		if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
			log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
		else
			log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
			log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) +
				mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
		break;
	default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -595,12 +745,16 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
		u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
		u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
		u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
		bool unaligned = xsk ? xsk->unaligned : false;

		if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
						   log_wqe_num_of_strides)) {
						   log_wqe_num_of_strides,
						   page_shift, unaligned)) {
			mlx5_core_err(mdev,
				      "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u\n",
				      log_wqe_stride_size, log_wqe_num_of_strides);
				      "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, unaligned %d\n",
				      log_wqe_stride_size, log_wqe_num_of_strides,
				      unaligned);
			return -EINVAL;
		}

@@ -608,7 +762,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
			 log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
		MLX5_SET(wq, wq, log_wqe_stride_size,
			 log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
		if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
			MLX5_SET(wq, wq, shampo_enable, true);
			MLX5_SET(wq, wq, log_reservation_size,
@@ -720,13 +874,6 @@ static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev,
	param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}

static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
{
	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);

	return MLX5_GET(wq, wq, log_wq_sz);
}

/* This function calculates the maximum number of headers entries that are needed
 * per WQE, the formula is based on the size of the reservations and the
 * restriction we have about max packets for reservation that is equal to max
@@ -787,32 +934,82 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
	return wqebbs;
}

static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
					struct mlx5e_params *params,
					struct mlx5e_xsk_param *xsk)
{
	u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
	bool unaligned = xsk ? xsk->unaligned : false;
	u8 umr_wqebbs;

	umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, unaligned);

	return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
}

static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
				      struct mlx5e_params *params,
				      struct mlx5e_rq_param *rqp)
{
	u32 wqebbs;
	u32 wqebbs, total_pages, useful_space;

	/* MLX5_WQ_TYPE_CYCLIC */
	if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
		return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;

	wqebbs = mlx5e_mpwrq_umr_wqebbs(MLX5_MPWRQ_PAGES_PER_WQE) *
		(1 << mlx5e_get_rq_log_wq_sz(rqp->rqc));
	/* UMR WQEs for the regular RQ. */
	wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL);

	/* If XDP program is attached, XSK may be turned on at any time without
	 * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of
	 * both regular RQ and XSK RQ.
	 * Although mlx5e_mpwqe_get_log_rq_size accepts mlx5e_xsk_param, it
	 * doesn't affect its return value, as long as params->xdp_prog != NULL,
	 * so we can just multiply by 2.
	 *
	 * XSK uses different values of page_shift, and the total number of UMR
	 * WQEBBs depends on it. This dependency is complex and not monotonic,
	 * especially taking into consideration that some of the parameters come
	 * from capabilities. Hence, we have to try all valid values of XSK
	 * frame size (and page_shift) to find the maximum.
	 */
	if (params->xdp_prog)
		wqebbs *= 2;
	if (params->xdp_prog) {
		u32 max_xsk_wqebbs = 0;
		u8 frame_shift;

		for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT;
		     frame_shift <= PAGE_SHIFT; frame_shift++) {
			/* The headroom doesn't affect the calculation. */
			struct mlx5e_xsk_param xsk = {
				.chunk_size = 1 << frame_shift,
				.unaligned = false,
			};

			/* XSK aligned mode. */
			max_xsk_wqebbs = max(max_xsk_wqebbs,
				mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));

			/* XSK unaligned mode, frame size is a power of two. */
			xsk.unaligned = true;
			max_xsk_wqebbs = max(max_xsk_wqebbs,
				mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
		}

		wqebbs += max_xsk_wqebbs;
	}

	if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
		wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp);

	/* UMR WQEs don't cross the page boundary, they are padded with NOPs.
	 * This padding is always smaller than the max WQE size. That gives us
	 * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes
	 * per page. The number of pages is estimated as the total size of WQEs
	 * divided by the useful space in page, rounding up. If some WQEs don't
	 * fully fit into the useful space, they can occupy part of the padding,
	 * which proves this estimation to be correct (reserve enough space).
	 */
	useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB;
	total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space);
	wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB);

	return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs));
}

@@ -866,7 +1063,7 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
	mlx5e_build_sq_param_common(mdev, param);
	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
	param->is_xdp_mb = !mlx5e_rx_is_linear_skb(params, xsk);
	param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk);
	mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
}

+13 −4

File changed.

Preview size limit exceeded, changes collapsed.

+2 −5
Original line number Diff line number Diff line
@@ -162,10 +162,10 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
	mlx5e_free_rx_descs(rq);

	err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR);
	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
	if (err)
		goto out;
		return err;

	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
	mlx5e_activate_rq(rq);
	rq->stats->recover++;
	if (rq->channel)
@@ -173,9 +173,6 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
	else
		mlx5e_trigger_napi_sched(rq->cq.napi);
	return 0;
out:
	clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
	return err;
}

static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+53 −53

File changed.

Preview size limit exceeded, changes collapsed.

Loading