Commit ffa501ef authored by Aharon Landau's avatar Aharon Landau Committed by Jason Gunthorpe
Browse files

RDMA/mlx5: Add steering support in optional flow counters

Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.

Link: https://lore.kernel.org/r/20211008122439.166063-12-markzhang@nvidia.com


Signed-off-by: default avatarAharon Landau <aharonl@nvidia.com>
Reviewed-by: default avatarMaor Gottlieb <maorg@nvidia.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarMark Zhang <markzhang@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 886773d2
Loading
Loading
Loading
Loading
+187 −0
Original line number Diff line number Diff line
@@ -10,12 +10,14 @@
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
#include <net/inet_ecn.h>
#include "mlx5_ib.h"
#include "counters.h"
#include "devx.h"
@@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
	return prio;
}

enum {
	RDMA_RX_ECN_OPCOUNTER_PRIO,
	RDMA_RX_CNP_OPCOUNTER_PRIO,
};

enum {
	RDMA_TX_CNP_OPCOUNTER_PRIO,
};

static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
			      struct mlx5_flow_spec *spec)
{
	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
					ft_field_support.source_vhca_port) ||
	    !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
					ft_field_support.source_vhca_port))
		return -EOPNOTSUPP;

	MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
			 misc_parameters.source_vhca_port);
	MLX5_SET(fte_match_param, &spec->match_value,
		 misc_parameters.source_vhca_port, port_num);

	return 0;
}

static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
			   struct mlx5_flow_spec *spec, int ipv)
{
	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
					ft_field_support.outer_ip_version))
		return -EOPNOTSUPP;

	if (mlx5_core_mp_enabled(dev->mdev) &&
	    set_vhca_port_spec(dev, port_num, spec))
		return -EOPNOTSUPP;

	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
			 outer_headers.ip_ecn);
	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
		 INET_ECN_CE);
	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
			 outer_headers.ip_version);
	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
		 ipv);

	spec->match_criteria_enable =
		get_match_criteria_enable(spec->match_criteria);

	return 0;
}

static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
			struct mlx5_flow_spec *spec)
{
	if (mlx5_core_mp_enabled(dev->mdev) &&
	    set_vhca_port_spec(dev, port_num, spec))
		return -EOPNOTSUPP;

	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
			 misc_parameters.bth_opcode);
	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
		 IB_BTH_OPCODE_CNP);

	spec->match_criteria_enable =
		get_match_criteria_enable(spec->match_criteria);

	return 0;
}

int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
			 struct mlx5_ib_op_fc *opfc,
			 enum mlx5_ib_optional_counter_type type)
{
	enum mlx5_flow_namespace_type fn_type;
	int priority, i, err, spec_num;
	struct mlx5_flow_act flow_act = {};
	struct mlx5_flow_destination dst;
	struct mlx5_flow_namespace *ns;
	struct mlx5_ib_flow_prio *prio;
	struct mlx5_flow_spec *spec;

	spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
	if (!spec)
		return -ENOMEM;

	switch (type) {
	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
		if (set_ecn_ce_spec(dev, port_num, &spec[0],
				    MLX5_FS_IPV4_VERSION) ||
		    set_ecn_ce_spec(dev, port_num, &spec[1],
				    MLX5_FS_IPV6_VERSION)) {
			err = -EOPNOTSUPP;
			goto free;
		}
		spec_num = 2;
		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
		break;

	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
					ft_field_support_2_nic_receive_rdma.bth_opcode) ||
		    set_cnp_spec(dev, port_num, &spec[0])) {
			err = -EOPNOTSUPP;
			goto free;
		}
		spec_num = 1;
		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
		break;

	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
					ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
		    set_cnp_spec(dev, port_num, &spec[0])) {
			err = -EOPNOTSUPP;
			goto free;
		}
		spec_num = 1;
		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
		break;

	default:
		err = -EOPNOTSUPP;
		goto free;
	}

	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
	if (!ns) {
		err = -EOPNOTSUPP;
		goto free;
	}

	prio = &dev->flow_db->opfcs[type];
	if (!prio->flow_table) {
		prio = _get_prio(ns, prio, priority,
				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
		if (IS_ERR(prio)) {
			err = PTR_ERR(prio);
			goto free;
		}
	}

	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
	dst.counter_id = mlx5_fc_id(opfc->fc);

	flow_act.action =
		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;

	for (i = 0; i < spec_num; i++) {
		opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
						    &flow_act, &dst, 1);
		if (IS_ERR(opfc->rule[i])) {
			err = PTR_ERR(opfc->rule[i]);
			goto del_rules;
		}
	}
	prio->refcount += spec_num;
	kfree(spec);

	return 0;

del_rules:
	for (i -= 1; i >= 0; i--)
		mlx5_del_flow_rules(opfc->rule[i]);
	put_flow_table(dev, prio, false);
free:
	kfree(spec);
	return err;
}

void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
			     struct mlx5_ib_op_fc *opfc,
			     enum mlx5_ib_optional_counter_type type)
{
	int i;

	for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
		mlx5_del_flow_rules(opfc->rule[i]);
		put_flow_table(dev, &dev->flow_db->opfcs[type], true);
	}
}

static void set_underlay_qp(struct mlx5_ib_dev *dev,
			    struct mlx5_flow_spec *spec,
			    u32 underlay_qpn)
+24 −0
Original line number Diff line number Diff line
@@ -263,6 +263,14 @@ struct mlx5_ib_pp {
	struct mlx5_core_dev *mdev;
};

enum mlx5_ib_optional_counter_type {
	MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
	MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
	MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,

	MLX5_IB_OPCOUNTER_MAX,
};

struct mlx5_ib_flow_db {
	struct mlx5_ib_flow_prio	prios[MLX5_IB_NUM_FLOW_FT];
	struct mlx5_ib_flow_prio	egress_prios[MLX5_IB_NUM_FLOW_FT];
@@ -271,6 +279,7 @@ struct mlx5_ib_flow_db {
	struct mlx5_ib_flow_prio	fdb;
	struct mlx5_ib_flow_prio	rdma_rx[MLX5_IB_NUM_FLOW_FT];
	struct mlx5_ib_flow_prio	rdma_tx[MLX5_IB_NUM_FLOW_FT];
	struct mlx5_ib_flow_prio	opfcs[MLX5_IB_OPCOUNTER_MAX];
	struct mlx5_flow_table		*lag_demux_ft;
	/* Protect flow steering bypass flow tables
	 * when add/del flow rules.
@@ -797,6 +806,13 @@ struct mlx5_ib_resources {
	struct mlx5_ib_port_resources ports[2];
};

#define MAX_OPFC_RULES 2

struct mlx5_ib_op_fc {
	struct mlx5_fc *fc;
	struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
};

struct mlx5_ib_counters {
	struct rdma_stat_desc *descs;
	size_t *offsets;
@@ -807,6 +823,14 @@ struct mlx5_ib_counters {
	u16 set_id;
};

int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
			 struct mlx5_ib_op_fc *opfc,
			 enum mlx5_ib_optional_counter_type type);

void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
			     struct mlx5_ib_op_fc *opfc,
			     enum mlx5_ib_optional_counter_type type);

struct mlx5_ib_multiport_info;

struct mlx5_ib_multiport {
+1 −0
Original line number Diff line number Diff line
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
#define IB_BTH_SE_SHIFT	23
#define IB_BTH_TVER_MASK	0xf
#define IB_BTH_TVER_SHIFT	16
#define IB_BTH_OPCODE_CNP	0x81

static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
{