Commit 53110c67 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'fdb-entries-on-dsa-lag-interfaces'

Vladimir Oltean says:

====================
FDB entries on DSA LAG interfaces

This work permits having static and local FDB entries on LAG interfaces
that are offloaded by DSA ports. New API needs to be introduced in
drivers. To maintain consistency with the bridging offload code, I've
taken the liberty to reorganize the data structures added by Tobias in
the DSA core a little bit.

Tested on NXP LS1028A (felix switch). Would appreciate feedback/testing
on other platforms too. Testing procedure was the one described here:
https://patchwork.kernel.org/project/netdevbpf/cover/20210205130240.4072854-1-vladimir.oltean@nxp.com/

with this script:

ip link del bond0
ip link add bond0 type bond mode 802.3ad
ip link set swp1 down && ip link set swp1 master bond0 && ip link set swp1 up
ip link set swp2 down && ip link set swp2 master bond0 && ip link set swp2 up
ip link del br0
ip link add br0 type bridge && ip link set br0 up
ip link set br0 arp off
ip link set bond0 master br0 && ip link set bond0 up
ip link set swp0 master br0 && ip link set swp0 up
ip link set dev bond0 type bridge_slave flood off learning off
bridge fdb add dev bond0 <mac address of other eno0> master static

I'm noticing a problem in 'bridge fdb dump' with the 'self' entries, and
I didn't solve this. On Ocelot, an entry learned on a LAG is reported as
being on the first member port of it (so instead of saying 'self bond0',
it says 'self swp1'). This is better than not seeing the entry at all,
but when DSA queries for the FDBs on a port via ds->ops->port_fdb_dump,
it never queries for FDBs on a LAG. Not clear what we should do there,
we aren't in control of the ->ndo_fdb_dump of the bonding/team drivers.
Alternatively, we could just consider the 'self' entries reported via
ndo_fdb_dump as "better than nothing", and concentrate on the 'master'
entries that are in sync with the bridge when packets are flooded to
software.
====================

Link: https://lore.kernel.org/r/20220223140054.3379617-1-vladimir.oltean@nxp.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 89183b6e 961d8b69
Loading
Loading
Loading
Loading
+24 −22
Original line number Diff line number Diff line
@@ -1625,15 +1625,16 @@ static int mv88e6xxx_pvt_map(struct mv88e6xxx_chip *chip, int dev, int port)

		ds = dsa_switch_find(dst->index, dev);
		dp = ds ? dsa_to_port(ds, port) : NULL;
		if (dp && dp->lag_dev) {
		if (dp && dp->lag) {
			/* As the PVT is used to limit flooding of
			 * FORWARD frames, which use the LAG ID as the
			 * source port, we must translate dev/port to
			 * the special "LAG device" in the PVT, using
			 * the LAG ID as the port number.
			 * the LAG ID (one-based) as the port number
			 * (zero-based).
			 */
			dev = MV88E6XXX_G2_PVT_ADDR_DEV_TRUNK;
			port = dsa_lag_id(dst, dp->lag_dev);
			port = dsa_port_lag_id_get(dp) - 1;
		}
	}

@@ -1671,7 +1672,7 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
	struct mv88e6xxx_chip *chip = ds->priv;
	int err;

	if (dsa_to_port(ds, port)->lag_dev)
	if (dsa_to_port(ds, port)->lag)
		/* Hardware is incapable of fast-aging a LAG through a
		 * regular ATU move operation. Until we have something
		 * more fancy in place this is a no-op.
@@ -6175,21 +6176,20 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
}

static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds,
				      struct net_device *lag,
				      struct dsa_lag lag,
				      struct netdev_lag_upper_info *info)
{
	struct mv88e6xxx_chip *chip = ds->priv;
	struct dsa_port *dp;
	int id, members = 0;
	int members = 0;

	if (!mv88e6xxx_has_lag(chip))
		return false;

	id = dsa_lag_id(ds->dst, lag);
	if (id < 0 || id >= ds->num_lag_ids)
	if (!lag.id)
		return false;

	dsa_lag_foreach_port(dp, ds->dst, lag)
	dsa_lag_foreach_port(dp, ds->dst, &lag)
		/* Includes the port joining the LAG */
		members++;

@@ -6209,20 +6209,21 @@ static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds,
	return true;
}

static int mv88e6xxx_lag_sync_map(struct dsa_switch *ds, struct net_device *lag)
static int mv88e6xxx_lag_sync_map(struct dsa_switch *ds, struct dsa_lag lag)
{
	struct mv88e6xxx_chip *chip = ds->priv;
	struct dsa_port *dp;
	u16 map = 0;
	int id;

	id = dsa_lag_id(ds->dst, lag);
	/* DSA LAG IDs are one-based, hardware is zero-based */
	id = lag.id - 1;

	/* Build the map of all ports to distribute flows destined for
	 * this LAG. This can be either a local user port, or a DSA
	 * port if the LAG port is on a remote chip.
	 */
	dsa_lag_foreach_port(dp, ds->dst, lag)
	dsa_lag_foreach_port(dp, ds->dst, &lag)
		map |= BIT(dsa_towards_port(ds, dp->ds->index, dp->index));

	return mv88e6xxx_g2_trunk_mapping_write(chip, id, map);
@@ -6267,8 +6268,8 @@ static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds)
{
	struct mv88e6xxx_chip *chip = ds->priv;
	unsigned int id, num_tx;
	struct net_device *lag;
	struct dsa_port *dp;
	struct dsa_lag *lag;
	int i, err, nth;
	u16 mask[8];
	u16 ivec;
@@ -6277,8 +6278,8 @@ static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds)
	ivec = BIT(mv88e6xxx_num_ports(chip)) - 1;

	/* Disable all masks for ports that _are_ members of a LAG. */
	list_for_each_entry(dp, &ds->dst->ports, list) {
		if (!dp->lag_dev || dp->ds != ds)
	dsa_switch_for_each_port(dp, ds) {
		if (!dp->lag)
			continue;

		ivec &= ~BIT(dp->index);
@@ -6291,7 +6292,7 @@ static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds)
	 * are in the Tx set.
	 */
	dsa_lags_foreach_id(id, ds->dst) {
		lag = dsa_lag_dev(ds->dst, id);
		lag = dsa_lag_by_id(ds->dst, id);
		if (!lag)
			continue;

@@ -6327,7 +6328,7 @@ static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds)
}

static int mv88e6xxx_lag_sync_masks_map(struct dsa_switch *ds,
					struct net_device *lag)
					struct dsa_lag lag)
{
	int err;

@@ -6351,7 +6352,7 @@ static int mv88e6xxx_port_lag_change(struct dsa_switch *ds, int port)
}

static int mv88e6xxx_port_lag_join(struct dsa_switch *ds, int port,
				   struct net_device *lag,
				   struct dsa_lag lag,
				   struct netdev_lag_upper_info *info)
{
	struct mv88e6xxx_chip *chip = ds->priv;
@@ -6360,7 +6361,8 @@ static int mv88e6xxx_port_lag_join(struct dsa_switch *ds, int port,
	if (!mv88e6xxx_lag_can_offload(ds, lag, info))
		return -EOPNOTSUPP;

	id = dsa_lag_id(ds->dst, lag);
	/* DSA LAG IDs are one-based */
	id = lag.id - 1;

	mv88e6xxx_reg_lock(chip);

@@ -6383,7 +6385,7 @@ static int mv88e6xxx_port_lag_join(struct dsa_switch *ds, int port,
}

static int mv88e6xxx_port_lag_leave(struct dsa_switch *ds, int port,
				    struct net_device *lag)
				    struct dsa_lag lag)
{
	struct mv88e6xxx_chip *chip = ds->priv;
	int err_sync, err_trunk;
@@ -6408,7 +6410,7 @@ static int mv88e6xxx_crosschip_lag_change(struct dsa_switch *ds, int sw_index,
}

static int mv88e6xxx_crosschip_lag_join(struct dsa_switch *ds, int sw_index,
					int port, struct net_device *lag,
					int port, struct dsa_lag lag,
					struct netdev_lag_upper_info *info)
{
	struct mv88e6xxx_chip *chip = ds->priv;
@@ -6431,7 +6433,7 @@ static int mv88e6xxx_crosschip_lag_join(struct dsa_switch *ds, int sw_index,
}

static int mv88e6xxx_crosschip_lag_leave(struct dsa_switch *ds, int sw_index,
					 int port, struct net_device *lag)
					 int port, struct dsa_lag lag)
{
	struct mv88e6xxx_chip *chip = ds->priv;
	int err_sync, err_pvt;
+22 −4
Original line number Diff line number Diff line
@@ -614,6 +614,22 @@ static int felix_fdb_del(struct dsa_switch *ds, int port,
	return ocelot_fdb_del(ocelot, port, addr, vid);
}

static int felix_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag lag,
			     const unsigned char *addr, u16 vid)
{
	struct ocelot *ocelot = ds->priv;

	return ocelot_lag_fdb_add(ocelot, lag.dev, addr, vid);
}

static int felix_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag lag,
			     const unsigned char *addr, u16 vid)
{
	struct ocelot *ocelot = ds->priv;

	return ocelot_lag_fdb_del(ocelot, lag.dev, addr, vid);
}

static int felix_mdb_add(struct dsa_switch *ds, int port,
			 const struct switchdev_obj_port_mdb *mdb)
{
@@ -677,20 +693,20 @@ static void felix_bridge_leave(struct dsa_switch *ds, int port,
}

static int felix_lag_join(struct dsa_switch *ds, int port,
			  struct net_device *bond,
			  struct dsa_lag lag,
			  struct netdev_lag_upper_info *info)
{
	struct ocelot *ocelot = ds->priv;

	return ocelot_port_lag_join(ocelot, port, bond, info);
	return ocelot_port_lag_join(ocelot, port, lag.dev, info);
}

static int felix_lag_leave(struct dsa_switch *ds, int port,
			   struct net_device *bond)
			   struct dsa_lag lag)
{
	struct ocelot *ocelot = ds->priv;

	ocelot_port_lag_leave(ocelot, port, bond);
	ocelot_port_lag_leave(ocelot, port, lag.dev);

	return 0;
}
@@ -1579,6 +1595,8 @@ const struct dsa_switch_ops felix_switch_ops = {
	.port_fdb_dump			= felix_fdb_dump,
	.port_fdb_add			= felix_fdb_add,
	.port_fdb_del			= felix_fdb_del,
	.lag_fdb_add			= felix_lag_fdb_add,
	.lag_fdb_del			= felix_lag_fdb_del,
	.port_mdb_add			= felix_mdb_add,
	.port_mdb_del			= felix_mdb_del,
	.port_pre_bridge_flags		= felix_pre_bridge_flags,
+14 −18
Original line number Diff line number Diff line
@@ -2646,18 +2646,16 @@ qca8k_get_tag_protocol(struct dsa_switch *ds, int port,
}

static bool
qca8k_lag_can_offload(struct dsa_switch *ds,
		      struct net_device *lag,
qca8k_lag_can_offload(struct dsa_switch *ds, struct dsa_lag lag,
		      struct netdev_lag_upper_info *info)
{
	struct dsa_port *dp;
	int id, members = 0;
	int members = 0;

	id = dsa_lag_id(ds->dst, lag);
	if (id < 0 || id >= ds->num_lag_ids)
	if (!lag.id)
		return false;

	dsa_lag_foreach_port(dp, ds->dst, lag)
	dsa_lag_foreach_port(dp, ds->dst, &lag)
		/* Includes the port joining the LAG */
		members++;

@@ -2675,16 +2673,14 @@ qca8k_lag_can_offload(struct dsa_switch *ds,
}

static int
qca8k_lag_setup_hash(struct dsa_switch *ds,
		     struct net_device *lag,
qca8k_lag_setup_hash(struct dsa_switch *ds, struct dsa_lag lag,
		     struct netdev_lag_upper_info *info)
{
	struct net_device *lag_dev = lag.dev;
	struct qca8k_priv *priv = ds->priv;
	bool unique_lag = true;
	unsigned int i;
	u32 hash = 0;
	int i, id;

	id = dsa_lag_id(ds->dst, lag);

	switch (info->hash_type) {
	case NETDEV_LAG_HASH_L23:
@@ -2701,7 +2697,7 @@ qca8k_lag_setup_hash(struct dsa_switch *ds,

	/* Check if we are the unique configured LAG */
	dsa_lags_foreach_id(i, ds->dst)
		if (i != id && dsa_lag_dev(ds->dst, i)) {
		if (i != lag.id && dsa_lag_by_id(ds->dst, i)) {
			unique_lag = false;
			break;
		}
@@ -2716,7 +2712,7 @@ qca8k_lag_setup_hash(struct dsa_switch *ds,
	if (unique_lag) {
		priv->lag_hash_mode = hash;
	} else if (priv->lag_hash_mode != hash) {
		netdev_err(lag, "Error: Mismatched Hash Mode across different lag is not supported\n");
		netdev_err(lag_dev, "Error: Mismatched Hash Mode across different lag is not supported\n");
		return -EOPNOTSUPP;
	}

@@ -2726,13 +2722,14 @@ qca8k_lag_setup_hash(struct dsa_switch *ds,

static int
qca8k_lag_refresh_portmap(struct dsa_switch *ds, int port,
			  struct net_device *lag, bool delete)
			  struct dsa_lag lag, bool delete)
{
	struct qca8k_priv *priv = ds->priv;
	int ret, id, i;
	u32 val;

	id = dsa_lag_id(ds->dst, lag);
	/* DSA LAG IDs are one-based, hardware is zero-based */
	id = lag.id - 1;

	/* Read current port member */
	ret = regmap_read(priv->regmap, QCA8K_REG_GOL_TRUNK_CTRL0, &val);
@@ -2794,8 +2791,7 @@ qca8k_lag_refresh_portmap(struct dsa_switch *ds, int port,
}

static int
qca8k_port_lag_join(struct dsa_switch *ds, int port,
		    struct net_device *lag,
qca8k_port_lag_join(struct dsa_switch *ds, int port, struct dsa_lag lag,
		    struct netdev_lag_upper_info *info)
{
	int ret;
@@ -2812,7 +2808,7 @@ qca8k_port_lag_join(struct dsa_switch *ds, int port,

static int
qca8k_port_lag_leave(struct dsa_switch *ds, int port,
		     struct net_device *lag)
		     struct dsa_lag lag)
{
	return qca8k_lag_refresh_portmap(ds, port, lag, true);
}
+7 −5
Original line number Diff line number Diff line
@@ -419,6 +419,9 @@ static int lan966x_netdevice_event(struct notifier_block *nb,
	return notifier_from_errno(ret);
}

/* We don't offload uppers such as LAG as bridge ports, so every device except
 * the bridge itself is foreign.
 */
static bool lan966x_foreign_dev_check(const struct net_device *dev,
				      const struct net_device *foreign_dev)
{
@@ -426,10 +429,10 @@ static bool lan966x_foreign_dev_check(const struct net_device *dev,
	struct lan966x *lan966x = port->lan966x;

	if (netif_is_bridge_master(foreign_dev))
		if (lan966x->bridge != foreign_dev)
			return true;

		if (lan966x->bridge == foreign_dev)
			return false;

	return true;
}

static int lan966x_switchdev_event(struct notifier_block *nb,
@@ -449,8 +452,7 @@ static int lan966x_switchdev_event(struct notifier_block *nb,
		err = switchdev_handle_fdb_event_to_device(dev, event, ptr,
							   lan966x_netdevice_check,
							   lan966x_foreign_dev_check,
							   lan966x_handle_fdb,
							   NULL);
							   lan966x_handle_fdb);
		return notifier_from_errno(err);
	}

+127 −1
Original line number Diff line number Diff line
@@ -1907,6 +1907,8 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond)
	u32 mask = 0;
	int port;

	lockdep_assert_held(&ocelot->fwd_domain_lock);

	for (port = 0; port < ocelot->num_phys_ports; port++) {
		struct ocelot_port *ocelot_port = ocelot->ports[port];

@@ -1920,6 +1922,19 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond)
	return mask;
}

/* The logical port number of a LAG is equal to the lowest numbered physical
 * port ID present in that LAG. It may change if that port ever leaves the LAG.
 */
static int ocelot_bond_get_id(struct ocelot *ocelot, struct net_device *bond)
{
	int bond_mask = ocelot_get_bond_mask(ocelot, bond);

	if (!bond_mask)
		return -ENOENT;

	return __ffs(bond_mask);
}

u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port)
{
	struct ocelot_port *ocelot_port = ocelot->ports[src_port];
@@ -2413,7 +2428,7 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)

		bond = ocelot_port->bond;
		if (bond) {
			int lag = __ffs(ocelot_get_bond_mask(ocelot, bond));
			int lag = ocelot_bond_get_id(ocelot, bond);

			ocelot_rmw_gix(ocelot,
				       ANA_PORT_PORT_CFG_PORTID_VAL(lag),
@@ -2428,6 +2443,46 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)
	}
}

/* Documentation for PORTID_VAL says:
 *     Logical port number for front port. If port is not a member of a LLAG,
 *     then PORTID must be set to the physical port number.
 *     If port is a member of a LLAG, then PORTID must be set to the common
 *     PORTID_VAL used for all member ports of the LLAG.
 *     The value must not exceed the number of physical ports on the device.
 *
 * This means we have little choice but to migrate FDB entries pointing towards
 * a logical port when that changes.
 */
static void ocelot_migrate_lag_fdbs(struct ocelot *ocelot,
				    struct net_device *bond,
				    int lag)
{
	struct ocelot_lag_fdb *fdb;
	int err;

	lockdep_assert_held(&ocelot->fwd_domain_lock);

	list_for_each_entry(fdb, &ocelot->lag_fdbs, list) {
		if (fdb->bond != bond)
			continue;

		err = ocelot_mact_forget(ocelot, fdb->addr, fdb->vid);
		if (err) {
			dev_err(ocelot->dev,
				"failed to delete LAG %s FDB %pM vid %d: %pe\n",
				bond->name, fdb->addr, fdb->vid, ERR_PTR(err));
		}

		err = ocelot_mact_learn(ocelot, lag, fdb->addr, fdb->vid,
					ENTRYTYPE_LOCKED);
		if (err) {
			dev_err(ocelot->dev,
				"failed to migrate LAG %s FDB %pM vid %d: %pe\n",
				bond->name, fdb->addr, fdb->vid, ERR_PTR(err));
		}
	}
}

int ocelot_port_lag_join(struct ocelot *ocelot, int port,
			 struct net_device *bond,
			 struct netdev_lag_upper_info *info)
@@ -2452,14 +2507,23 @@ EXPORT_SYMBOL(ocelot_port_lag_join);
void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
			   struct net_device *bond)
{
	int old_lag_id, new_lag_id;

	mutex_lock(&ocelot->fwd_domain_lock);

	old_lag_id = ocelot_bond_get_id(ocelot, bond);

	ocelot->ports[port]->bond = NULL;

	ocelot_setup_logical_port_ids(ocelot);
	ocelot_apply_bridge_fwd_mask(ocelot, false);
	ocelot_set_aggr_pgids(ocelot);

	new_lag_id = ocelot_bond_get_id(ocelot, bond);

	if (new_lag_id >= 0 && old_lag_id != new_lag_id)
		ocelot_migrate_lag_fdbs(ocelot, bond, new_lag_id);

	mutex_unlock(&ocelot->fwd_domain_lock);
}
EXPORT_SYMBOL(ocelot_port_lag_leave);
@@ -2468,13 +2532,74 @@ void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active)
{
	struct ocelot_port *ocelot_port = ocelot->ports[port];

	mutex_lock(&ocelot->fwd_domain_lock);

	ocelot_port->lag_tx_active = lag_tx_active;

	/* Rebalance the LAGs */
	ocelot_set_aggr_pgids(ocelot);

	mutex_unlock(&ocelot->fwd_domain_lock);
}
EXPORT_SYMBOL(ocelot_port_lag_change);

int ocelot_lag_fdb_add(struct ocelot *ocelot, struct net_device *bond,
		       const unsigned char *addr, u16 vid)
{
	struct ocelot_lag_fdb *fdb;
	int lag, err;

	fdb = kzalloc(sizeof(*fdb), GFP_KERNEL);
	if (!fdb)
		return -ENOMEM;

	ether_addr_copy(fdb->addr, addr);
	fdb->vid = vid;
	fdb->bond = bond;

	mutex_lock(&ocelot->fwd_domain_lock);
	lag = ocelot_bond_get_id(ocelot, bond);

	err = ocelot_mact_learn(ocelot, lag, addr, vid, ENTRYTYPE_LOCKED);
	if (err) {
		mutex_unlock(&ocelot->fwd_domain_lock);
		kfree(fdb);
		return err;
	}

	list_add_tail(&fdb->list, &ocelot->lag_fdbs);
	mutex_unlock(&ocelot->fwd_domain_lock);

	return 0;
}
EXPORT_SYMBOL_GPL(ocelot_lag_fdb_add);

int ocelot_lag_fdb_del(struct ocelot *ocelot, struct net_device *bond,
		       const unsigned char *addr, u16 vid)
{
	struct ocelot_lag_fdb *fdb, *tmp;

	mutex_lock(&ocelot->fwd_domain_lock);

	list_for_each_entry_safe(fdb, tmp, &ocelot->lag_fdbs, list) {
		if (!ether_addr_equal(fdb->addr, addr) || fdb->vid != vid ||
		    fdb->bond != bond)
			continue;

		ocelot_mact_forget(ocelot, addr, vid);
		list_del(&fdb->list);
		mutex_unlock(&ocelot->fwd_domain_lock);
		kfree(fdb);

		return 0;
	}

	mutex_unlock(&ocelot->fwd_domain_lock);

	return -ENOENT;
}
EXPORT_SYMBOL_GPL(ocelot_lag_fdb_del);

/* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
 * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
 * In the special case that it's the NPI port that we're configuring, the
@@ -2769,6 +2894,7 @@ int ocelot_init(struct ocelot *ocelot)
	INIT_LIST_HEAD(&ocelot->multicast);
	INIT_LIST_HEAD(&ocelot->pgids);
	INIT_LIST_HEAD(&ocelot->vlans);
	INIT_LIST_HEAD(&ocelot->lag_fdbs);
	ocelot_detect_features(ocelot);
	ocelot_mact_init(ocelot);
	ocelot_vlan_init(ocelot);
Loading