Commit c692a0be authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bridge-dsa-sandwiched-LAG'



Vladimir Oltean says:

====================
Better support for sandwiched LAGs with bridge and DSA

Changes in v4:
- Added missing EXPORT_SYMBOL_GPL
- Using READ_ONCE(fdb->dst)
- Split patches into (a) adding the bridge helpers (b) making DSA use them
- br_mdb_replay went back to the v1 approach where it allocated memory
  in atomic context
- Created a br_switchdev_mdb_populate which reduces some of the code
  duplication
- Fixed the error message in dsa_port_clear_brport_flags
- Replaced "dsa_port_vlan_filtering(dp, br, extack)" with
  "dsa_port_vlan_filtering(dp, br_vlan_enabled(br), extack)" (duh)
- Added review tags (sorry if I missed any)

The objective of this series is to make LAG uppers on top of switchdev
ports work regardless of which order we link interfaces to their masters
(first make the port join the LAG, then the LAG join the bridge, or the
other way around).

There was a design decision to be made in patches 2-4 on whether we
should adopt the "push" model (which attempts to solve the problem
centrally, in the bridge layer) where the driver just calls:

  switchdev_bridge_port_offloaded(brport_dev,
                                  &atomic_notifier_block,
                                  &blocking_notifier_block,
                                  extack);

and the bridge just replays the entire collection of switchdev port
attributes and objects that it has, in some predefined order and with
some predefined error handling logic;

or the "pull" model (which attempts to solve the problem by giving the
driver the rope to hang itself), where the driver, apart from calling:

  switchdev_bridge_port_offloaded(brport_dev, extack);

has the task of "dumpster diving" (as Tobias puts it) through the bridge
attributes and objects by itself, by calling:

  - br_vlan_replay
  - br_fdb_replay
  - br_mdb_replay
  - br_vlan_enabled
  - br_port_flag_is_set
  - br_port_get_stp_state
  - br_multicast_router
  - br_get_ageing_time

(not necessarily all of them, and not necessarily in this order, and
with driver-defined error handling).

Even though I'm not in love myself with the "pull" model, I chose it
because there is a fundamental trick with replaying switchdev events
like this:

ip link add br0 type bridge
ip link add bond0 type bond
ip link set bond0 master br0
ip link set swp0 master bond0 <- this will replay the objects once for
                                 the bond0 bridge port, and the swp0
                                 switchdev port will process them
ip link set swp1 master bond0 <- this will replay the objects again for
                                 the bond0 bridge port, and the swp1
                                 switchdev port will see them, but swp0
                                 will see them for the second time now

Basically I believe that it is implementation defined whether the driver
wants to error out on switchdev objects seen twice on a port, and the
bridge should not enforce a certain model for that. For example, for FDB
entries added to a bonding interface, the underling switchdev driver
might have an abstraction for just that: an FDB entry pointing towards a
logical (as opposed to physical) port. So when the second port joins the
bridge, it doesn't realy need to replay FDB entries, since there is
already at least one hardware port which has been receiving those
events, and the FDB entries don't need to be added a second time to the
same logical port.
In the other corner, we have the drivers that handle switchdev port
attributes on a LAG as individual switchdev port attributes on physical
ports (example: VLAN filtering). In fact, the switchdev_handle_port_attr_set
helper facilitates this: it is a fan-out from a single orig_dev towards
multiple lowers that pass the check_cb().
But that's the point: switchdev_handle_port_attr_set is just a helper
which the driver _opts_ to use. The bridge can't enforce the "push"
model, because that would assume that all drivers handle port attributes
in the same way, which is probably false.

For this reason, I preferred to go with the "pull" mode for this patch
set. Just to see how bad it is for other switchdev drivers to copy-paste
this logic, I added the pull support to ocelot too, and I think it's
pretty manageable.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 65d2dbb3 e4bd44e8
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -719,7 +719,9 @@ static int felix_bridge_join(struct dsa_switch *ds, int port,
{
	struct ocelot *ocelot = ds->priv;

	return ocelot_port_bridge_join(ocelot, port, br);
	ocelot_port_bridge_join(ocelot, port, br);

	return 0;
}

static void felix_bridge_leave(struct dsa_switch *ds, int port,
+2 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ config NET_VENDOR_MICROSEMI

if NET_VENDOR_MICROSEMI

# Users should depend on NET_SWITCHDEV, HAS_IOMEM
# Users should depend on NET_SWITCHDEV, HAS_IOMEM, BRIDGE
config MSCC_OCELOT_SWITCH_LIB
	select NET_DEVLINK
	select REGMAP_MMIO
@@ -24,6 +24,7 @@ config MSCC_OCELOT_SWITCH_LIB

config MSCC_OCELOT_SWITCH
	tristate "Ocelot switch driver"
	depends on BRIDGE || BRIDGE=n
	depends on NET_SWITCHDEV
	depends on HAS_IOMEM
	depends on OF_NET
+6 −12
Original line number Diff line number Diff line
@@ -1514,34 +1514,28 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
}
EXPORT_SYMBOL(ocelot_port_mdb_del);

int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
			     struct net_device *bridge)
{
	struct ocelot_port *ocelot_port = ocelot->ports[port];

	ocelot_port->bridge = bridge;

	return 0;
	ocelot_apply_bridge_fwd_mask(ocelot);
}
EXPORT_SYMBOL(ocelot_port_bridge_join);

int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
			      struct net_device *bridge)
{
	struct ocelot_port *ocelot_port = ocelot->ports[port];
	struct ocelot_vlan pvid = {0}, native_vlan = {0};
	int ret;

	ocelot_port->bridge = NULL;

	ret = ocelot_port_vlan_filtering(ocelot, port, false);
	if (ret)
		return ret;

	ocelot_port_set_pvid(ocelot, port, pvid);
	ocelot_port_set_native_vlan(ocelot, port, native_vlan);

	return 0;
	ocelot_apply_bridge_fwd_mask(ocelot);
}
EXPORT_SYMBOL(ocelot_port_bridge_leave);

+175 −33
Original line number Diff line number Diff line
@@ -1117,77 +1117,213 @@ static int ocelot_port_obj_del(struct net_device *dev,
	return ret;
}

static int ocelot_netdevice_bridge_join(struct ocelot *ocelot, int port,
					struct net_device *bridge)
static void ocelot_inherit_brport_flags(struct ocelot *ocelot, int port,
					struct net_device *brport_dev)
{
	struct switchdev_brport_flags flags = {0};
	int flag;

	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;

	for_each_set_bit(flag, &flags.mask, 32)
		if (br_port_flag_is_set(brport_dev, BIT(flag)))
			flags.val |= BIT(flag);

	ocelot_port_bridge_flags(ocelot, port, flags);
}

static void ocelot_clear_brport_flags(struct ocelot *ocelot, int port)
{
	struct switchdev_brport_flags flags;
	int err;

	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
	flags.val = flags.mask;
	flags.val = flags.mask & ~BR_LEARNING;

	err = ocelot_port_bridge_join(ocelot, port, bridge);
	ocelot_port_bridge_flags(ocelot, port, flags);
}

static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
				 struct net_device *brport_dev,
				 struct net_device *bridge_dev,
				 struct netlink_ext_ack *extack)
{
	clock_t ageing_time;
	u8 stp_state;
	int err;

	ocelot_inherit_brport_flags(ocelot, port, brport_dev);

	stp_state = br_port_get_stp_state(brport_dev);
	ocelot_bridge_stp_state_set(ocelot, port, stp_state);

	err = ocelot_port_vlan_filtering(ocelot, port,
					 br_vlan_enabled(bridge_dev));
	if (err)
		return err;

	ocelot_port_bridge_flags(ocelot, port, flags);
	ageing_time = br_get_ageing_time(bridge_dev);
	ocelot_port_attr_ageing_set(ocelot, port, ageing_time);

	err = br_mdb_replay(bridge_dev, brport_dev,
			    &ocelot_switchdev_blocking_nb, extack);
	if (err && err != -EOPNOTSUPP)
		return err;

	err = br_fdb_replay(bridge_dev, brport_dev, &ocelot_switchdev_nb);
	if (err)
		return err;

	err = br_vlan_replay(bridge_dev, brport_dev,
			     &ocelot_switchdev_blocking_nb, extack);
	if (err && err != -EOPNOTSUPP)
		return err;

	return 0;
}

static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port)
{
	int err;

	err = ocelot_port_vlan_filtering(ocelot, port, false);
	if (err)
		return err;

	ocelot_clear_brport_flags(ocelot, port);

	ocelot_bridge_stp_state_set(ocelot, port, BR_STATE_FORWARDING);

	return 0;
}

static int ocelot_netdevice_bridge_leave(struct ocelot *ocelot, int port,
static int ocelot_netdevice_bridge_join(struct net_device *dev,
					struct net_device *brport_dev,
					struct net_device *bridge,
					struct netlink_ext_ack *extack)
{
	struct ocelot_port_private *priv = netdev_priv(dev);
	struct ocelot_port *ocelot_port = &priv->port;
	struct ocelot *ocelot = ocelot_port->ocelot;
	int port = priv->chip_port;
	int err;

	ocelot_port_bridge_join(ocelot, port, bridge);

	err = ocelot_switchdev_sync(ocelot, port, brport_dev, bridge, extack);
	if (err)
		goto err_switchdev_sync;

	return 0;

err_switchdev_sync:
	ocelot_port_bridge_leave(ocelot, port, bridge);
	return err;
}

static int ocelot_netdevice_bridge_leave(struct net_device *dev,
					 struct net_device *brport_dev,
					 struct net_device *bridge)
{
	struct switchdev_brport_flags flags;
	struct ocelot_port_private *priv = netdev_priv(dev);
	struct ocelot_port *ocelot_port = &priv->port;
	struct ocelot *ocelot = ocelot_port->ocelot;
	int port = priv->chip_port;
	int err;

	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
	flags.val = flags.mask & ~BR_LEARNING;
	err = ocelot_switchdev_unsync(ocelot, port);
	if (err)
		return err;

	err = ocelot_port_bridge_leave(ocelot, port, bridge);
	ocelot_port_bridge_leave(ocelot, port, bridge);

	ocelot_port_bridge_flags(ocelot, port, flags);
	return 0;
}

static int ocelot_netdevice_lag_join(struct net_device *dev,
				     struct net_device *bond,
				     struct netdev_lag_upper_info *info,
				     struct netlink_ext_ack *extack)
{
	struct ocelot_port_private *priv = netdev_priv(dev);
	struct ocelot_port *ocelot_port = &priv->port;
	struct ocelot *ocelot = ocelot_port->ocelot;
	struct net_device *bridge_dev;
	int port = priv->chip_port;
	int err;

	err = ocelot_port_lag_join(ocelot, port, bond, info);
	if (err == -EOPNOTSUPP) {
		NL_SET_ERR_MSG_MOD(extack, "Offloading not supported");
		return 0;
	}

	bridge_dev = netdev_master_upper_dev_get(bond);
	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
		return 0;

	err = ocelot_netdevice_bridge_join(dev, bond, bridge_dev, extack);
	if (err)
		goto err_bridge_join;

	return 0;

err_bridge_join:
	ocelot_port_lag_leave(ocelot, port, bond);
	return err;
}

static int ocelot_netdevice_changeupper(struct net_device *dev,
					struct netdev_notifier_changeupper_info *info)
static int ocelot_netdevice_lag_leave(struct net_device *dev,
				      struct net_device *bond)
{
	struct ocelot_port_private *priv = netdev_priv(dev);
	struct ocelot_port *ocelot_port = &priv->port;
	struct ocelot *ocelot = ocelot_port->ocelot;
	struct net_device *bridge_dev;
	int port = priv->chip_port;

	ocelot_port_lag_leave(ocelot, port, bond);

	bridge_dev = netdev_master_upper_dev_get(bond);
	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
		return 0;

	return ocelot_netdevice_bridge_leave(dev, bond, bridge_dev);
}

static int ocelot_netdevice_changeupper(struct net_device *dev,
					struct netdev_notifier_changeupper_info *info)
{
	struct netlink_ext_ack *extack;
	int err = 0;

	extack = netdev_notifier_info_to_extack(&info->info);

	if (netif_is_bridge_master(info->upper_dev)) {
		if (info->linking) {
			err = ocelot_netdevice_bridge_join(ocelot, port,
							   info->upper_dev);
		} else {
			err = ocelot_netdevice_bridge_leave(ocelot, port,
							    info->upper_dev);
		}
	}
	if (netif_is_lag_master(info->upper_dev)) {
		if (info->linking) {
			err = ocelot_port_lag_join(ocelot, port,
		if (info->linking)
			err = ocelot_netdevice_bridge_join(dev, dev,
							   info->upper_dev,
						   info->upper_info);
			if (err == -EOPNOTSUPP) {
				NL_SET_ERR_MSG_MOD(info->info.extack,
						   "Offloading not supported");
				err = 0;
			}
		} else {
			ocelot_port_lag_leave(ocelot, port,
							   extack);
		else
			err = ocelot_netdevice_bridge_leave(dev, dev,
							    info->upper_dev);
	}
	if (netif_is_lag_master(info->upper_dev)) {
		if (info->linking)
			err = ocelot_netdevice_lag_join(dev, info->upper_dev,
							info->upper_info, extack);
		else
			ocelot_netdevice_lag_leave(dev, info->upper_dev);
	}

	return notifier_from_errno(err);
}

/* Treat CHANGEUPPER events on an offloaded LAG as individual CHANGEUPPER
 * events for the lower physical ports of the LAG.
 * If the LAG upper isn't offloaded, ignore its CHANGEUPPER events.
 * In case the LAG joined a bridge, notify that we are offloading it and can do
 * forwarding in hardware towards it.
 */
static int
ocelot_netdevice_lag_changeupper(struct net_device *dev,
				 struct netdev_notifier_changeupper_info *info)
@@ -1197,6 +1333,12 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev,
	int err = NOTIFY_DONE;

	netdev_for_each_lower_dev(dev, lower, iter) {
		struct ocelot_port_private *priv = netdev_priv(lower);
		struct ocelot_port *ocelot_port = &priv->port;

		if (ocelot_port->bond != dev)
			return NOTIFY_OK;

		err = ocelot_netdevice_changeupper(lower, info);
		if (err)
			return notifier_from_errno(err);
+40 −0
Original line number Diff line number Diff line
@@ -69,6 +69,8 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
bool br_multicast_enabled(const struct net_device *dev);
bool br_multicast_router(const struct net_device *dev);
int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
		  struct notifier_block *nb, struct netlink_ext_ack *extack);
#else
static inline int br_multicast_list_adjacent(struct net_device *dev,
					     struct list_head *br_ip_list)
@@ -93,6 +95,13 @@ static inline bool br_multicast_router(const struct net_device *dev)
{
	return false;
}
static inline int br_mdb_replay(struct net_device *br_dev,
				struct net_device *dev,
				struct notifier_block *nb,
				struct netlink_ext_ack *extack)
{
	return -EOPNOTSUPP;
}
#endif

#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
@@ -102,6 +111,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid);
int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
int br_vlan_get_info(const struct net_device *dev, u16 vid,
		     struct bridge_vlan_info *p_vinfo);
int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
		   struct notifier_block *nb, struct netlink_ext_ack *extack);
#else
static inline bool br_vlan_enabled(const struct net_device *dev)
{
@@ -128,6 +139,14 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
{
	return -EINVAL;
}

static inline int br_vlan_replay(struct net_device *br_dev,
				 struct net_device *dev,
				 struct notifier_block *nb,
				 struct netlink_ext_ack *extack)
{
	return -EOPNOTSUPP;
}
#endif

#if IS_ENABLED(CONFIG_BRIDGE)
@@ -136,6 +155,10 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
				    __u16 vid);
void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
u8 br_port_get_stp_state(const struct net_device *dev);
clock_t br_get_ageing_time(struct net_device *br_dev);
int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
		  struct notifier_block *nb);
#else
static inline struct net_device *
br_fdb_find_port(const struct net_device *br_dev,
@@ -154,6 +177,23 @@ br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
{
	return false;
}

static inline u8 br_port_get_stp_state(const struct net_device *dev)
{
	return BR_STATE_DISABLED;
}

static inline clock_t br_get_ageing_time(struct net_device *br_dev)
{
	return 0;
}

static inline int br_fdb_replay(struct net_device *br_dev,
				struct net_device *dev,
				struct notifier_block *nb)
{
	return -EOPNOTSUPP;
}
#endif

#endif
Loading