Commit 272ecfc9 authored by Vlad Buslov's avatar Vlad Buslov Committed by Saeed Mahameed
Browse files

net/mlx5: Bridge, add per-port multicast replication tables



Multicast replication requires adding one more level of FDB_BR_OFFLOAD
priority flow tables. The new level is used for per-port multicast-specific
tables that have following flow groups structure (flow highest to lowest
priority):

- Flow group of size one that matches on source port metadata. This will
have a static single rule that prevent packets from being replicated to
their source port.

- Flow group of size one that matches all packets and forwards them to the
port that owns the table.

Initialize the table dynamically on all bridge ports when adding a port to
the bridge that has multicast enabled and on all existing bridge ports when
receiving multicast enable notification.

Signed-off-by: default avatarVlad Buslov <vladbu@nvidia.com>
Reviewed-by: default avatarMaor Dickman <maord@nvidia.com>
Reviewed-by: default avatarRoi Dayan <roid@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 18c2916c
Loading
Loading
Loading
Loading
+13 −1
Original line number Diff line number Diff line
@@ -61,7 +61,7 @@ mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw)
	return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB);
}

static struct mlx5_flow_table *
struct mlx5_flow_table *
mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
{
	struct mlx5_flow_table_attr ft_attr = {};
@@ -1506,6 +1506,15 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16
	port->bridge = bridge;
	port->flags |= flags;
	xa_init(&port->vlans);

	err = mlx5_esw_bridge_port_mcast_init(port);
	if (err) {
		esw_warn(esw->dev,
			 "Failed to initialize port multicast (vport=%u,esw_owner_vhca_id=%u,err=%d)\n",
			 port->vport_num, port->esw_owner_vhca_id, err);
		goto err_port_mcast;
	}

	err = mlx5_esw_bridge_port_insert(port, br_offloads);
	if (err) {
		esw_warn(esw->dev,
@@ -1518,6 +1527,8 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16
	return 0;

err_port_insert:
	mlx5_esw_bridge_port_mcast_cleanup(port);
err_port_mcast:
	kvfree(port);
	return err;
}
@@ -1535,6 +1546,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off

	trace_mlx5_esw_bridge_vport_cleanup(port);
	mlx5_esw_bridge_port_vlans_flush(port, bridge);
	mlx5_esw_bridge_port_mcast_cleanup(port);
	mlx5_esw_bridge_port_erase(port, br_offloads);
	kvfree(port);
	mlx5_esw_bridge_put(br_offloads, bridge);
+328 −1
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */

#include "lib/devcom.h"
#include "bridge.h"
#include "eswitch.h"
#include "bridge_priv.h"

static int mlx5_esw_bridge_port_mcast_fts_init(struct mlx5_esw_bridge_port *port,
					       struct mlx5_esw_bridge *bridge)
{
	struct mlx5_eswitch *esw = bridge->br_offloads->esw;
	struct mlx5_flow_table *mcast_ft;

	mcast_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE,
						MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE,
						esw);
	if (IS_ERR(mcast_ft))
		return PTR_ERR(mcast_ft);

	port->mcast.ft = mcast_ft;
	return 0;
}

static void mlx5_esw_bridge_port_mcast_fts_cleanup(struct mlx5_esw_bridge_port *port)
{
	if (port->mcast.ft)
		mlx5_destroy_flow_table(port->mcast.ft);
	port->mcast.ft = NULL;
}

static struct mlx5_flow_group *
mlx5_esw_bridge_mcast_filter_fg_create(struct mlx5_eswitch *esw,
				       struct mlx5_flow_table *mcast_ft)
{
	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
	struct mlx5_flow_group *fg;
	u32 *in, *match;

	in = kvzalloc(inlen, GFP_KERNEL);
	if (!in)
		return ERR_PTR(-ENOMEM);

	MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);

	MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
		 mlx5_eswitch_get_vport_metadata_mask());

	MLX5_SET(create_flow_group_in, in, start_flow_index,
		 MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM);
	MLX5_SET(create_flow_group_in, in, end_flow_index,
		 MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO);

	fg = mlx5_create_flow_group(mcast_ft, in);
	kvfree(in);
	if (IS_ERR(fg))
		esw_warn(esw->dev,
			 "Failed to create filter flow group for bridge mcast table (err=%pe)\n",
			 fg);

	return fg;
}

static struct mlx5_flow_group *
mlx5_esw_bridge_mcast_fwd_fg_create(struct mlx5_eswitch *esw,
				    struct mlx5_flow_table *mcast_ft)
{
	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
	struct mlx5_flow_group *fg;
	u32 *in;

	in = kvzalloc(inlen, GFP_KERNEL);
	if (!in)
		return ERR_PTR(-ENOMEM);

	MLX5_SET(create_flow_group_in, in, start_flow_index,
		 MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM);
	MLX5_SET(create_flow_group_in, in, end_flow_index,
		 MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO);

	fg = mlx5_create_flow_group(mcast_ft, in);
	kvfree(in);
	if (IS_ERR(fg))
		esw_warn(esw->dev,
			 "Failed to create forward flow group for bridge mcast table (err=%pe)\n",
			 fg);

	return fg;
}

static int mlx5_esw_bridge_port_mcast_fgs_init(struct mlx5_esw_bridge_port *port)
{
	struct mlx5_eswitch *esw = port->bridge->br_offloads->esw;
	struct mlx5_flow_table *mcast_ft = port->mcast.ft;
	struct mlx5_flow_group *fwd_fg, *filter_fg;
	int err;

	filter_fg = mlx5_esw_bridge_mcast_filter_fg_create(esw, mcast_ft);
	if (IS_ERR(filter_fg))
		return PTR_ERR(filter_fg);

	fwd_fg = mlx5_esw_bridge_mcast_fwd_fg_create(esw, mcast_ft);
	if (IS_ERR(fwd_fg)) {
		err = PTR_ERR(fwd_fg);
		goto err_fwd_fg;
	}

	port->mcast.filter_fg = filter_fg;
	port->mcast.fwd_fg = fwd_fg;

	return 0;

err_fwd_fg:
	mlx5_destroy_flow_group(filter_fg);
	return err;
}

static void mlx5_esw_bridge_port_mcast_fgs_cleanup(struct mlx5_esw_bridge_port *port)
{
	if (port->mcast.fwd_fg)
		mlx5_destroy_flow_group(port->mcast.fwd_fg);
	port->mcast.fwd_fg = NULL;
	if (port->mcast.filter_fg)
		mlx5_destroy_flow_group(port->mcast.filter_fg);
	port->mcast.filter_fg = NULL;
}

static struct mlx5_flow_handle *
mlx5_esw_bridge_mcast_flow_with_esw_create(struct mlx5_esw_bridge_port *port,
					   struct mlx5_eswitch *esw)
{
	struct mlx5_flow_act flow_act = {
		.action = MLX5_FLOW_CONTEXT_ACTION_DROP,
		.flags = FLOW_ACT_NO_APPEND,
	};
	struct mlx5_flow_spec *rule_spec;
	struct mlx5_flow_handle *handle;

	rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
	if (!rule_spec)
		return ERR_PTR(-ENOMEM);

	rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;

	MLX5_SET(fte_match_param, rule_spec->match_criteria,
		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
		 mlx5_eswitch_get_vport_metadata_for_match(esw, port->vport_num));

	handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, NULL, 0);

	kvfree(rule_spec);
	return handle;
}

static struct mlx5_flow_handle *
mlx5_esw_bridge_mcast_filter_flow_create(struct mlx5_esw_bridge_port *port)
{
	return mlx5_esw_bridge_mcast_flow_with_esw_create(port, port->bridge->br_offloads->esw);
}

static struct mlx5_flow_handle *
mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port)
{
	struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom;
	static struct mlx5_flow_handle *handle;
	struct mlx5_eswitch *peer_esw;

	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
	if (!peer_esw)
		return ERR_PTR(-ENODEV);

	handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw);

	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
	return handle;
}

static struct mlx5_flow_handle *
mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port)
{
	struct mlx5_flow_act flow_act = {
		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
		.flags = FLOW_ACT_NO_APPEND,
	};
	struct mlx5_flow_destination dest = {
		.type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
		.vport.num = port->vport_num,
	};
	struct mlx5_esw_bridge *bridge = port->bridge;
	struct mlx5_flow_spec *rule_spec;
	struct mlx5_flow_handle *handle;

	rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
	if (!rule_spec)
		return ERR_PTR(-ENOMEM);

	if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) &&
	    port->vport_num == MLX5_VPORT_UPLINK)
		rule_spec->flow_context.flow_source =
			MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;

	if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
		dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
		dest.vport.vhca_id = port->esw_owner_vhca_id;
	}
	handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1);

	kvfree(rule_spec);
	return handle;
}

static int mlx5_esw_bridge_port_mcast_fhs_init(struct mlx5_esw_bridge_port *port)
{
	struct mlx5_flow_handle *filter_handle, *fwd_handle;

	filter_handle = (port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) ?
		mlx5_esw_bridge_mcast_filter_flow_peer_create(port) :
		mlx5_esw_bridge_mcast_filter_flow_create(port);
	if (IS_ERR(filter_handle))
		return PTR_ERR(filter_handle);

	fwd_handle = mlx5_esw_bridge_mcast_fwd_flow_create(port);
	if (IS_ERR(fwd_handle)) {
		mlx5_del_flow_rules(filter_handle);
		return PTR_ERR(fwd_handle);
	}

	port->mcast.filter_handle = filter_handle;
	port->mcast.fwd_handle = fwd_handle;

	return 0;
}

static void mlx5_esw_bridge_port_mcast_fhs_cleanup(struct mlx5_esw_bridge_port *port)
{
	if (port->mcast.fwd_handle)
		mlx5_del_flow_rules(port->mcast.fwd_handle);
	port->mcast.fwd_handle = NULL;
	if (port->mcast.filter_handle)
		mlx5_del_flow_rules(port->mcast.filter_handle);
	port->mcast.filter_handle = NULL;
}

int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port)
{
	struct mlx5_esw_bridge *bridge = port->bridge;
	int err;

	if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG))
		return 0;

	err = mlx5_esw_bridge_port_mcast_fts_init(port, bridge);
	if (err)
		return err;

	err = mlx5_esw_bridge_port_mcast_fgs_init(port);
	if (err)
		goto err_fgs;

	err = mlx5_esw_bridge_port_mcast_fhs_init(port);
	if (err)
		goto err_fhs;
	return err;

err_fhs:
	mlx5_esw_bridge_port_mcast_fgs_cleanup(port);
err_fgs:
	mlx5_esw_bridge_port_mcast_fts_cleanup(port);
	return err;
}

void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port)
{
	mlx5_esw_bridge_port_mcast_fhs_cleanup(port);
	mlx5_esw_bridge_port_mcast_fgs_cleanup(port);
	mlx5_esw_bridge_port_mcast_fts_cleanup(port);
}

static struct mlx5_flow_group *
mlx5_esw_bridge_ingress_igmp_fg_create(struct mlx5_eswitch *esw,
				       struct mlx5_flow_table *ingress_ft)
@@ -251,6 +524,51 @@ mlx5_esw_bridge_ingress_mcast_fhs_cleanup(struct mlx5_esw_bridge_offloads *br_of
	br_offloads->igmp_handle = NULL;
}

static int mlx5_esw_brige_mcast_init(struct mlx5_esw_bridge *bridge)
{
	struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
	struct mlx5_esw_bridge_port *port, *failed;
	unsigned long i;
	int err;

	xa_for_each(&br_offloads->ports, i, port) {
		if (port->bridge != bridge)
			continue;

		err = mlx5_esw_bridge_port_mcast_init(port);
		if (err) {
			failed = port;
			goto err_port;
		}
	}
	return 0;

err_port:
	xa_for_each(&br_offloads->ports, i, port) {
		if (port == failed)
			break;
		if (port->bridge != bridge)
			continue;

		mlx5_esw_bridge_port_mcast_cleanup(port);
	}
	return err;
}

static void mlx5_esw_brige_mcast_cleanup(struct mlx5_esw_bridge *bridge)
{
	struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
	struct mlx5_esw_bridge_port *port;
	unsigned long i;

	xa_for_each(&br_offloads->ports, i, port) {
		if (port->bridge != bridge)
			continue;

		mlx5_esw_bridge_port_mcast_cleanup(port);
	}
}

static int mlx5_esw_brige_mcast_global_enable(struct mlx5_esw_bridge_offloads *br_offloads)
{
	int err;
@@ -306,11 +624,20 @@ int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge)
		return err;

	bridge->flags |= MLX5_ESW_BRIDGE_MCAST_FLAG;
	return 0;

	err = mlx5_esw_brige_mcast_init(bridge);
	if (err) {
		esw_warn(bridge->br_offloads->esw->dev, "Failed to enable multicast (err=%d)\n",
			 err);
		bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG;
		mlx5_esw_brige_mcast_global_disable(bridge->br_offloads);
	}
	return err;
}

void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge)
{
	mlx5_esw_brige_mcast_cleanup(bridge);
	bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG;
	mlx5_esw_brige_mcast_global_disable(bridge->br_offloads);
}
+28 −0
Original line number Diff line number Diff line
@@ -81,9 +81,23 @@ static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 524288);

#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0

#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE 1
#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE 1
#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM 0
#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO		\
	(MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE - 1)
#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM		\
	(MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO + 1)
#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO			\
	(MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM +			\
	 MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE - 1)

#define MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE			\
	(MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO + 1)
enum {
	MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
	MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
	MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE,
	MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
};

@@ -138,6 +152,14 @@ struct mlx5_esw_bridge_port {
	u16 flags;
	struct mlx5_esw_bridge *bridge;
	struct xarray vlans;
	struct {
		struct mlx5_flow_table *ft;
		struct mlx5_flow_group *filter_fg;
		struct mlx5_flow_group *fwd_fg;

		struct mlx5_flow_handle *filter_handle;
		struct mlx5_flow_handle *fwd_handle;
	} mcast;
};

struct mlx5_esw_bridge {
@@ -161,6 +183,12 @@ struct mlx5_esw_bridge {
	u16 vlan_proto;
};

struct mlx5_flow_table *mlx5_esw_bridge_table_create(int max_fte, u32 level,
						     struct mlx5_eswitch *esw);

int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port);
void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port);

int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge);
void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge);

+1 −1
Original line number Diff line number Diff line
@@ -2997,7 +2997,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
		goto out_err;
	}

	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3);
	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 4);
	if (IS_ERR(maj_prio)) {
		err = PTR_ERR(maj_prio);
		goto out_err;