Merge tag 'mlx5-updates-2023-03-28' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux (7079d5e6) · Commits · EulixOS / Software / Kernel

Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst

+0 −26

Original line number	Diff line number	Diff line
		@@ -346,32 +346,6 @@ the software port.
		- The number of receive packets with CQE compression on ring i [#accel]_.
		- Acceleration

		* - `rx[i]_cache_reuse`
		- The number of events of successful reuse of a page from a driver's
		internal page cache.
		- Acceleration

		* - `rx[i]_cache_full`
		- The number of events of full internal page cache where driver can't put a
		page back to the cache for recycling (page will be freed).
		- Acceleration

		* - `rx[i]_cache_empty`
		- The number of events where cache was empty - no page to give. Driver
		shall allocate new page.
		- Acceleration

		* - `rx[i]_cache_busy`
		- The number of events where cache head was busy and cannot be recycled.
		Driver allocated new page.
		- Acceleration

		* - `rx[i]_cache_waive`
		- The number of cache evacuation. This can occur due to page move to
		another NUMA node or page was pfmemalloc-ed and should be freed as soon
		as possible.
		- Acceleration

		* - `rx[i]_arfs_err`
		- Number of flow rules that failed to be added to the flow table.
		- Error

drivers/net/ethernet/mellanox/mlx5/core/en.h

+32 −19

Original line number	Diff line number	Diff line
		@@ -475,11 +475,6 @@ struct mlx5e_txqsq {
		cqe_ts_to_ns ptp_cyc2time;
		} ____cacheline_aligned_in_smp;

		union mlx5e_alloc_unit {
		struct page *page;
		struct xdp_buff *xsk;
		};

		/* XDP packets can be transmitted in different ways. On completion, we need to
		* distinguish between them to clean up things in a proper way.
		*/
		@@ -605,16 +600,35 @@ struct mlx5e_icosq {
		struct work_struct recover_work;
		} ____cacheline_aligned_in_smp;

		struct mlx5e_frag_page {
		struct page *page;
		u16 frags;
		};

		enum mlx5e_wqe_frag_flag {
		MLX5E_WQE_FRAG_LAST_IN_PAGE,
		MLX5E_WQE_FRAG_SKIP_RELEASE,
		};

		struct mlx5e_wqe_frag_info {
		union mlx5e_alloc_unit *au;
		union {
		struct mlx5e_frag_page *frag_page;
		struct xdp_buff **xskp;
		};
		u32 offset;
		bool last_in_page;
		u8 flags;
		};

		union mlx5e_alloc_units {
		DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages);
		DECLARE_FLEX_ARRAY(struct page *, pages);
		DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs);
		};

		struct mlx5e_mpw_info {
		u16 consumed_strides;
		DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
		union mlx5e_alloc_unit alloc_units[];
		DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE);
		union mlx5e_alloc_units alloc_units;
		};

		#define MLX5E_MAX_RX_FRAGS 4
		@@ -625,11 +639,6 @@ struct mlx5e_mpw_info {
		#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
		MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
		#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
		struct mlx5e_page_cache {
		u32 head;
		u32 tail;
		struct page *page_cache[MLX5E_CACHE_SIZE];
		};

		struct mlx5e_rq;
		typedef void (mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq, struct mlx5_cqe64*);
		@@ -661,19 +670,24 @@ struct mlx5e_rq_frags_info {
		struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS];
		u8 num_frags;
		u8 log_num_frags;
		u8 wqe_bulk;
		u16 wqe_bulk;
		u16 refill_unit;
		u8 wqe_index_mask;
		};

		struct mlx5e_dma_info {
		dma_addr_t addr;
		union {
		struct mlx5e_frag_page *frag_page;
		struct page *page;
		};
		};

		struct mlx5e_shampo_hd {
		u32 mkey;
		struct mlx5e_dma_info *info;
		struct page *last_page;
		struct mlx5e_frag_page *pages;
		u16 curr_page_index;
		u16 hd_per_wq;
		u16 hd_per_wqe;
		unsigned long *bitmap;
		@@ -702,7 +716,7 @@ struct mlx5e_rq {
		struct {
		struct mlx5_wq_cyc wq;
		struct mlx5e_wqe_frag_info *frags;
		union mlx5e_alloc_unit *alloc_units;
		union mlx5e_alloc_units *alloc_units;
		struct mlx5e_rq_frags_info info;
		mlx5e_fp_skb_from_cqe skb_from_cqe;
		} wqe;
		@@ -738,7 +752,6 @@ struct mlx5e_rq {
		struct mlx5e_rq_stats *stats;
		struct mlx5e_cq cq;
		struct mlx5e_cq_decomp cqd;
		struct mlx5e_page_cache page_cache;
		struct hwtstamp_config *tstamp;
		struct mlx5_clock *clock;
		struct mlx5e_icosq *icosq;

drivers/net/ethernet/mellanox/mlx5/core/en/params.c

+49 −4

Original line number	Diff line number	Diff line
		@@ -667,6 +667,48 @@ static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
		return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
		}

		static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params,
		struct mlx5e_rq_frags_info *info)
		{
		u16 bulk_bound_rq_size = (1 << params->log_rq_mtu_frames) / 4;
		u32 bulk_bound_rq_size_in_bytes;
		u32 sum_frag_strides = 0;
		u32 wqe_bulk_in_bytes;
		u16 split_factor;
		u32 wqe_bulk;
		int i;

		for (i = 0; i < info->num_frags; i++)
		sum_frag_strides += info->arr[i].frag_stride;

		/* For MTUs larger than PAGE_SIZE, align to PAGE_SIZE to reflect
		* amount of consumed pages per wqe in bytes.
		*/
		if (sum_frag_strides > PAGE_SIZE)
		sum_frag_strides = ALIGN(sum_frag_strides, PAGE_SIZE);

		bulk_bound_rq_size_in_bytes = bulk_bound_rq_size * sum_frag_strides;

		#define MAX_WQE_BULK_BYTES(xdp) ((xdp ? 256 : 512) * 1024)

		/* A WQE bulk should not exceed min(512KB, 1/4 of rq size). For XDP
		* keep bulk size smaller to avoid filling the page_pool cache on
		* every bulk refill.
		*/
		wqe_bulk_in_bytes = min_t(u32, MAX_WQE_BULK_BYTES(params->xdp_prog),
		bulk_bound_rq_size_in_bytes);
		wqe_bulk = DIV_ROUND_UP(wqe_bulk_in_bytes, sum_frag_strides);

		/* Make sure that allocations don't start when the page is still used
		* by older WQEs.
		*/
		info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk);

		split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog),
		PP_ALLOC_CACHE_REFILL * PAGE_SIZE);
		info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor);
		}

		#define DEFAULT_FRAG_SIZE (2048)

		static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
		@@ -774,11 +816,14 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
		}

		out:
		/* Bulking optimization to skip allocation until at least 8 WQEs can be
		* allocated in a row. At the same time, never start allocation when
		* the page is still used by older WQEs.
		/* Bulking optimization to skip allocation until a large enough number
		* of WQEs can be allocated in a row. Bulking also influences how well
		* deferred page release works.
		*/
		info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
		mlx5e_rx_compute_wqe_bulk_params(params, info);

		mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n",
		__func__, info->wqe_bulk, info->refill_unit);

		info->log_num_frags = order_base_2(info->num_frags);

drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -121,9 +121,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)

		mlx5e_reset_icosq_cc_pc(icosq);

		mlx5e_free_rx_in_progress_descs(rq);
		mlx5e_free_rx_missing_descs(rq);
		if (xskrq)
		mlx5e_free_rx_in_progress_descs(xskrq);
		mlx5e_free_rx_missing_descs(xskrq);

		clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
		mlx5e_activate_icosq(icosq);

drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h

+2 −4

Original line number	Diff line number	Diff line
		@@ -65,13 +65,11 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
		int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);

		/* RX */
		void mlx5e_page_dma_unmap(struct mlx5e_rq rq, struct page page);
		void mlx5e_page_release_dynamic(struct mlx5e_rq rq, struct page page, bool recycle);
		INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
		INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
		int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
		void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
		void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
		void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq);

		static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
		{
		@@ -489,7 +487,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size

		static inline struct mlx5e_mpw_info mlx5e_get_mpw_info(struct mlx5e_rq rq, int i)
		{
		size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
		size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe);

		return (struct mlx5e_mpw_info )((char )rq->mpwqe.info + array_size(i, isz));
		}