Commit 94db3317 authored by Eli Cohen's avatar Eli Cohen Committed by Saeed Mahameed
Browse files

net/mlx5: Support multiport eswitch mode



Multiport eswitch mode is a LAG mode that allows to add rules that
forward traffic to a specific physical port without being affected by LAG
affinity configuration.

This mode of operation is mutual exclusive with the other LAG modes used
by multipath and bonding.

To make the transition between the modes, we maintain a counter on the
number of rules specifying one of the uplink representors as the target
of mirred egress redirect action.

An example of such rule would be:

$ tc filter add dev enp8s0f0_0 prot all root flower dst_mac \
  00:11:22:33:44:55 action mirred egress redirect dev enp8s0f0

If the reference count just grows to one and LAG is not in use, we
create the LAG in multiport eswitch mode. Other mode changes are not
allowed while in this mode. When the reference count reaches zero, we
destroy the LAG and let other modes be used if needed.

logic also changed such that if forwarding to some uplink destination
cannot be guaranteed, we fail the operation so the rule will eventually
be in software and not in hardware.

Signed-off-by: default avatarEli Cohen <elic@nvidia.com>
Reviewed-by: default avatarMark Bloch <mbloch@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent a4a9c87e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
mlx5_core-$(CONFIG_MLX5_ESWITCH)     += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \
					en_rep.o en/rep/bond.o en/mod_hdr.o \
					en/mapping.o
					en/mapping.o lag/mpesw.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
					lib/fs_chains.o en/tc_tun.o \
					esw/indir_table.o en/tc_tun_encap.o \
+14 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include "en/tc_tun_encap.h"
#include "en/tc_priv.h"
#include "en_rep.h"
#include "lag/lag.h"

static bool
same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev)
@@ -215,6 +216,7 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
	struct net_device *uplink_dev;
	struct mlx5e_priv *out_priv;
	struct mlx5_eswitch *esw;
	bool is_uplink_rep;
	int *ifindexes;
	int if_count;
	int err;
@@ -229,6 +231,10 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,

	parse_state->ifindexes[if_count] = out_dev->ifindex;
	parse_state->if_count++;
	is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
	err = mlx5_lag_do_mirred(priv->mdev, out_dev);
	if (err)
		return err;

	out_dev = get_fdb_out_dev(uplink_dev, out_dev);
	if (!out_dev)
@@ -268,6 +274,14 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
	rpriv = out_priv->ppriv;
	esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
	esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;

	/* If output device is bond master then rules are not explicit
	 * so we don't attempt to count them.
	 */
	if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
	    MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
		attr->lag.count = true;

	esw_attr->out_count++;

	return 0;
+27 −1
Original line number Diff line number Diff line
@@ -1740,6 +1740,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,

	free_flow_post_acts(flow);

	if (flow->attr->lag.count)
		mlx5_lag_del_mpesw_rule(esw->dev);

	kvfree(attr->esw_attr->rx_tun_attr);
	kvfree(attr->parse_attr);
	kfree(flow->attr);
@@ -3788,12 +3791,25 @@ static bool is_lag_dev(struct mlx5e_priv *priv,
		 same_hw_reps(priv, peer_netdev));
}

static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
{
	if (mlx5e_eswitch_uplink_rep(out_dev) &&
	    MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
	    MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
		return true;

	return false;
}

bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
				    struct net_device *out_dev)
{
	if (is_merged_eswitch_vfs(priv, out_dev))
		return true;

	if (is_multiport_eligible(priv, out_dev))
		return true;

	if (is_lag_dev(priv, out_dev))
		return true;

@@ -4050,6 +4066,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
		     struct mlx5_core_dev *in_mdev)
{
	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
	struct netlink_ext_ack *extack = f->common.extack;
	struct mlx5e_tc_flow_parse_attr *parse_attr;
	struct mlx5e_tc_flow *flow;
@@ -4085,17 +4102,26 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
	if (err)
		goto err_free;

	if (flow->attr->lag.count) {
		err = mlx5_lag_add_mpesw_rule(esw->dev);
		if (err)
			goto err_free;
	}

	err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
	complete_all(&flow->init_done);
	if (err) {
		if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
			goto err_free;
			goto err_lag;

		add_unready_flow(flow);
	}

	return flow;

err_lag:
	if (flow->attr->lag.count)
		mlx5_lag_del_mpesw_rule(esw->dev);
err_free:
	mlx5e_flow_put(priv, flow);
out:
+7 −0
Original line number Diff line number Diff line
@@ -85,6 +85,13 @@ struct mlx5_flow_attr {
	u32 flags;
	struct list_head list;
	struct mlx5e_post_act_handle *post_act_handle;
	struct {
		/* Indicate whether the parsed flow should be counted for lag mode decision
		 * making
		 */
		bool count;
	} lag;
	/* keep this union last */
	union {
		struct mlx5_esw_flow_attr esw_attr[0];
		struct mlx5_nic_flow_attr nic_attr[0];
+3 −0
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@
#include "en_tc.h"
#include "en/mapping.h"
#include "devlink.h"
#include "lag/lag.h"

#define mlx5_esw_for_each_rep(esw, i, rep) \
	xa_for_each(&((esw)->offloads.vport_reps), i, rep)
@@ -418,6 +419,8 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
		dest[dest_idx].vport.vhca_id =
			MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
		dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
		if (mlx5_lag_mpesw_is_activated(esw->dev))
			dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
	}
	if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
		if (pkt_reformat) {
Loading