net/mlx5e: Support enhanced CQE compression (2c925db0) · Commits · EulixOS / Software / Kernel

drivers/net/ethernet/mellanox/mlx5/core/en.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -344,6 +344,7 @@ enum {
		MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
		MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
		MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
		MLX5E_RQ_STATE_MINI_CQE_ENHANCED, /* set when enhanced mini_cqe_cap is used */
		};

		struct mlx5e_cq {
		@@ -370,6 +371,7 @@ struct mlx5e_cq_decomp {
		u8 mini_arr_idx;
		u16 left;
		u16 wqe_counter;
		bool last_cqe_title;
		} ____cacheline_aligned_in_smp;

		enum mlx5e_dma_map_type {

drivers/net/ethernet/mellanox/mlx5/core/en/params.c

+8 −2

Original line number	Diff line number	Diff line
		@@ -608,13 +608,15 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
		MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
		MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;

		mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
		mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d %s)\n",
		params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
		params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
		BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) :
		BIT(params->log_rq_mtu_frames),
		BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS),
		MLX5_CAP_GEN(mdev, enhanced_cqe_compression) ?
		"enhanced" : "basic");
		}

		void mlx5e_set_rq_type(struct mlx5_core_dev mdev, struct mlx5e_params params)
		@@ -852,6 +854,10 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
		if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
		MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
		MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
		MLX5_SET(cqc, cqc, cqe_compression_layout,
		MLX5_CAP_GEN(mdev, enhanced_cqe_compression) ?
		MLX5_CQE_COMPRESS_LAYOUT_ENHANCED :
		MLX5_CQE_COMPRESS_LAYOUT_BASIC);
		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
		}

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

+8 −0

Original line number	Diff line number	Diff line
		@@ -1205,6 +1205,13 @@ int mlx5e_open_rq(struct mlx5e_params params, struct mlx5e_rq_param param,
		MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
		__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);

		/* For enhanced CQE compression packet processing. decompress
		* session according to the enhanced layout.
		*/
		if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) &&
		MLX5_CAP_GEN(mdev, enhanced_cqe_compression))
		__set_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state);

		return 0;

		err_destroy_rq:
		@@ -1895,6 +1902,7 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);

		cqe->op_own = 0xf1;
		cqe->validity_iteration_count = 0xff;
		}

		cq->mdev = mdev;

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

+129 −21

Original line number	Diff line number	Diff line
		@@ -89,6 +89,25 @@ static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq,
		memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64));
		}

		static void mlx5e_read_enhanced_title_slot(struct mlx5e_rq *rq,
		struct mlx5_cqe64 *cqe)
		{
		struct mlx5e_cq_decomp *cqd = &rq->cqd;
		struct mlx5_cqe64 *title = &cqd->title;

		memcpy(title, cqe, sizeof(struct mlx5_cqe64));

		if (likely(test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)))
		return;

		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
		cqd->wqe_counter = mpwrq_get_cqe_stride_index(title) +
		mpwrq_get_cqe_consumed_strides(title);
		else
		cqd->wqe_counter =
		mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, be16_to_cpu(title->wqe_counter) + 1);
		}

		static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
		struct mlx5_cqwq *wq,
		u32 cqcc)
		@@ -175,6 +194,38 @@ static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
		cqd->title.rss_hash_result = 0;
		}

		static u32 mlx5e_decompress_enhanced_cqe(struct mlx5e_rq *rq,
		struct mlx5_cqwq *wq,
		struct mlx5_cqe64 *cqe,
		int budget_rem)
		{
		struct mlx5e_cq_decomp *cqd = &rq->cqd;
		u32 cqcc, left;
		u32 i;

		left = get_cqe_enhanced_num_mini_cqes(cqe);
		/* Here we avoid breaking the cqe compression session in the middle
		* in case budget is not sufficient to handle all of it. In this case
		* we return work_done == budget_rem to give 'busy' napi indication.
		*/
		if (unlikely(left > budget_rem))
		return budget_rem;

		cqcc = wq->cc;
		cqd->mini_arr_idx = 0;
		memcpy(cqd->mini_arr, cqe, sizeof(struct mlx5_cqe64));
		for (i = 0; i < left; i++, cqd->mini_arr_idx++, cqcc++) {
		mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
		INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
		mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
		rq, &cqd->title);
		}
		wq->cc = cqcc;
		rq->stats->cqe_compress_pkts += left;

		return left;
		}

		static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
		struct mlx5_cqwq *wq,
		int update_owner_only,
		@@ -220,7 +271,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
		rq, &cqd->title);
		cqd->mini_arr_idx++;

		return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
		return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem);
		}

		static inline bool mlx5e_rx_cache_put(struct mlx5e_rq rq, struct page page)
		@@ -2211,45 +2262,102 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq rq, struct mlx5_cqe64 cq
		mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
		}

		int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
		static int mlx5e_rx_cq_process_enhanced_cqe_comp(struct mlx5e_rq *rq,
		struct mlx5_cqwq *cqwq,
		int budget_rem)
		{
		struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
		struct mlx5_cqwq *cqwq = &cq->wq;
		struct mlx5_cqe64 *cqe;
		struct mlx5_cqe64 cqe, title_cqe = NULL;
		struct mlx5e_cq_decomp *cqd = &rq->cqd;
		int work_done = 0;

		if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
		return 0;
		cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq);
		if (!cqe)
		return work_done;

		if (cqd->last_cqe_title &&
		(mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED)) {
		rq->stats->cqe_compress_blks++;
		cqd->last_cqe_title = false;
		}

		if (rq->cqd.left) {
		work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
		if (work_done >= budget)
		goto out;
		do {
		if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
		if (title_cqe) {
		mlx5e_read_enhanced_title_slot(rq, title_cqe);
		title_cqe = NULL;
		rq->stats->cqe_compress_blks++;
		}
		work_done +=
		mlx5e_decompress_enhanced_cqe(rq, cqwq, cqe,
		budget_rem - work_done);
		continue;
		}
		title_cqe = cqe;
		mlx5_cqwq_pop(cqwq);

		cqe = mlx5_cqwq_get_cqe(cqwq);
		if (!cqe) {
		if (unlikely(work_done))
		goto out;
		return 0;
		INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
		mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
		rq, cqe);
		work_done++;
		} while (work_done < budget_rem &&
		(cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq)));

		/* last cqe might be title on next poll bulk */
		if (title_cqe) {
		mlx5e_read_enhanced_title_slot(rq, title_cqe);
		cqd->last_cqe_title = true;
		}

		do {
		return work_done;
		}

		static int mlx5e_rx_cq_process_basic_cqe_comp(struct mlx5e_rq *rq,
		struct mlx5_cqwq *cqwq,
		int budget_rem)
		{
		struct mlx5_cqe64 *cqe;
		int work_done = 0;

		if (rq->cqd.left)
		work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget_rem);

		while (work_done < budget_rem && (cqe = mlx5_cqwq_get_cqe(cqwq))) {
		if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
		work_done +=
		mlx5e_decompress_cqes_start(rq, cqwq,
		budget - work_done);
		budget_rem - work_done);
		continue;
		}

		mlx5_cqwq_pop(cqwq);

		INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
		mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
		rq, cqe);
		} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
		work_done++;
		}

		return work_done;
		}

		int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
		{
		struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
		struct mlx5_cqwq *cqwq = &cq->wq;
		int work_done;

		if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
		return 0;

		if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state))
		work_done = mlx5e_rx_cq_process_enhanced_cqe_comp(rq, cqwq,
		budget);
		else
		work_done = mlx5e_rx_cq_process_basic_cqe_comp(rq, cqwq,
		budget);

		if (work_done == 0)
		return 0;

		out:
		if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb)
		mlx5e_shampo_flush_skb(rq, NULL, false);

drivers/net/ethernet/mellanox/mlx5/core/wq.h

+17 −0

Original line number	Diff line number	Diff line
		@@ -243,6 +243,23 @@ static inline struct mlx5_cqe64 mlx5_cqwq_get_cqe(struct mlx5_cqwq wq)
		return cqe;
		}

		static inline
		struct mlx5_cqe64 mlx5_cqwq_get_cqe_enahnced_comp(struct mlx5_cqwq wq)
		{
		u8 sw_validity_iteration_count = mlx5_cqwq_get_wrap_cnt(wq) & 0xff;
		u32 ci = mlx5_cqwq_get_ci(wq);
		struct mlx5_cqe64 *cqe;

		cqe = mlx5_cqwq_get_wqe(wq, ci);
		if (cqe->validity_iteration_count != sw_validity_iteration_count)
		return NULL;

		/* ensure cqe content is read after cqe ownership bit/validity byte */
		dma_rmb();

		return cqe;
		}

		static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
		{
		return (u32)wq->fbc.sz_m1 + 1;