Commit 35187642 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'htb-offload'

Maxim Mikityanskiy says:

====================
HTB offload

This series adds support for HTB offload to the HTB qdisc, and adds
usage to mlx5 driver.

The previous RFCs are available at [1], [2].

The feature is intended to solve the performance bottleneck caused by
the single lock of the HTB qdisc, which prevents it from scaling well.
The HTB algorithm itself is offloaded to the device, eliminating the
need to take the root lock of HTB on every packet. Classification part
is done in clsact (still in software) to avoid acquiring the lock, which
imposes a limitation that filters can target only leaf classes.

The speedup on Mellanox ConnectX-6 Dx was 14.2 times in the UDP
multi-stream test, compared to software HTB implementation (more details
in the mlx5 patch).

[1]: https://www.spinics.net/lists/netdev/msg628422.html
[2]: https://www.spinics.net/lists/netdev/msg663548.html

v2 changes:

Fixed sparse and smatch warnings. Formatted HTB patches to 80 chars per
line.

v3 changes:

Fixed the CI failure on parisc with 16-bit xchg by replacing it with
WRITE_ONCE. Fixed the capability bits in mlx5_ifc.h and the value of
MLX5E_QOS_MAX_LEAF_NODES.

v4 changes:

Check if HTB is root when offloading. Add extack for hardware errors.
Rephrase explanations of how it works in the commit message. Remove %hu
from format strings. Add resiliency when leaf_del_last fails to create a
new leaf node.
====================

Link: https://lore.kernel.org/r/20210119120815.463334-1-maximmi@mellanox.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 04a88637 214baf22
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -16,7 +16,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
		lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o
		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
		fw_reset.o qos.o

#
# Netdev basic
@@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o
		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
		en/qos.o

#
# Netdev extra
+26 −1
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@
#include "en_stats.h"
#include "en/dcbnl.h"
#include "en/fs.h"
#include "en/qos.h"
#include "lib/hv_vhca.h"

extern const struct net_device_ops mlx5e_netdev_ops;
@@ -161,6 +162,9 @@ do { \
			    ##__VA_ARGS__);                     \
} while (0)

#define mlx5e_state_dereference(priv, p) \
	rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))

enum mlx5e_rq_group {
	MLX5E_RQ_GROUP_REGULAR,
	MLX5E_RQ_GROUP_XSK,
@@ -663,11 +667,13 @@ struct mlx5e_channel {
	struct mlx5e_xdpsq         rq_xdpsq;
	struct mlx5e_txqsq         sq[MLX5E_MAX_NUM_TC];
	struct mlx5e_icosq         icosq;   /* internal control operations */
	struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
	bool                       xdp;
	struct napi_struct         napi;
	struct device             *pdev;
	struct net_device         *netdev;
	__be32                     mkey_be;
	u16                        qos_sqs_size;
	u8                         num_tc;
	u8                         lag_port;

@@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param {
	int next_state;
	int rl_update;
	int rl_index;
	bool qos_update;
	u16 qos_queue_group_id;
};

#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
@@ -788,10 +796,20 @@ struct mlx5e_scratchpad {
	cpumask_var_t cpumask;
};

struct mlx5e_htb {
	DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
	DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
	struct mlx5e_sq_stats **qos_sq_stats;
	u16 max_qos_sqs;
	u16 maj_id;
	u16 defcls;
};

struct mlx5e_priv {
	/* priv data path fields - start */
	/* +1 for port ptp ts */
	struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC];
	struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC +
				   MLX5E_QOS_MAX_LEAF_NODES];
	int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
	int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
#ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -859,6 +877,7 @@ struct mlx5e_priv {
	struct mlx5e_hv_vhca_stats_agent stats_agent;
#endif
	struct mlx5e_scratchpad    scratchpad;
	struct mlx5e_htb           htb;
};

struct mlx5e_rx_handlers {
@@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
			       struct mlx5e_channels *new_chs,
			       mlx5e_fp_preactivate preactivate,
			       void *context);
int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
@@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);

int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
		    struct mlx5e_modify_sq_param *p);
int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
		     struct mlx5e_params *params, struct mlx5e_sq_param *param,
		     struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid);
void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq);
void mlx5e_free_txqsq(struct mlx5e_txqsq *sq);
@@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param;
int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
			struct mlx5e_sq_param *param,
			struct mlx5e_create_sq_param *csp,
			u16 qos_queue_group_id,
			u32 *sqn);
void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
void mlx5e_close_txqsq(struct mlx5e_txqsq *sq);

static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
{
+2 −0
Original line number Diff line number Diff line
@@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
			  struct mlx5e_rq_param *param);
void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
				 struct mlx5e_sq_param *param);
void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
			  struct mlx5e_sq_param *param);
void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
			     struct mlx5e_params *params,
			     struct mlx5e_xsk_param *xsk,
+1 −1
Original line number Diff line number Diff line
@@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn,
	csp.min_inline_mode = txqsq->min_inline_mode;
	csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;

	err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn);
	err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
	if (err)
		goto err_free_txqsq;

+984 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading