Commit b57e0d48 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files


Tony Nguyen says:

====================
ice: switchdev bridge offload

Wojciech Drewek says:

Linux bridge provides ability to learn MAC addresses and vlans
detected on bridge's ports. As a result of this, FDB (forward data base)
entries are created and they can be offloaded to the HW. By adding
VF's port representors to the bridge together with the uplink netdev,
we can learn VF's and link partner's MAC addresses. This is achieved
by slow/exception-path, where packets that do not match any filters
(FDB entries in this case) are send to the bridge ports.

Driver keeps track of the netdevs added to the bridge
by listening for NETDEV_CHANGEUPPER event. We distinguish two types
of bridge ports: uplink port and VF's representor port. Linux
bridge always learns src MAC of the packet on rx path. With the
current slow-path implementation, it means that we will learn
VF's MAC on port repr (when the VF transmits the packet) and
link partner's MAC on uplink (when we receive it on uplink from LAN).

The driver is notified about learning of the MAC/VLAN by
SWITCHDEV_FDB_{ADD|DEL}_TO_DEVICE events. This is followed by creation
of the HW filter. The direction of the filter is based on port
type (uplink or VF repr). In case of the uplink, rule forwards
the packets to the LAN (matching on link partner's MAC). When the
notification is received on VF repr then the rule forwards the
packets to the associated VF (matching on VF's MAC).

This approach would not work on its own however. This is because if
one of the directions is offloaded, then the bridge would not be able
to learn the other one. If the egress rule is added (learned on uplink)
then the response from the VF will be sent directly to the LAN.
The packet will not got through slow-path, it would not be seen on
VF's port repr. Because of that, the bridge would not learn VF's MAC.

This is solved by introducing guard rule. It prevents forward rule from
working until the opposite direction is offloaded.

Aging is not fully supported yet, aging time is static for now. The
follow up submissions will introduce counters that will allow us to
keep track if the rule is actually being used or not.

A few fixes/changes are needed for this feature to work with ice driver.
These are introduced in first 5 patches.

Reviewed-by: default avatarVlad Buslov <vladbu@nvidia.com>

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
  ice: add tracepoints for the switchdev bridge
  ice: implement static version of ageing
  ice: implement bridge port vlan
  ice: Add VLAN FDB support in switchdev mode
  ice: Add guard rule when creating FDB in switchdev
  ice: Switchdev FDB events support
  ice: Implement basic eswitch bridge setup
  ice: Unset src prune on uplink VSI
  ice: Disable vlan pruning for uplink VSI
  ice: Don't tx before switchdev is fully configured
  ice: Prohibit rx mode change in switchdev mode
  ice: Skip adv rules removal upon switchdev release
====================

Link: https://lore.kernel.org/r/20230724161152.2177196-1-anthony.l.nguyen@intel.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 20bf98c9 d129c2a2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -47,5 +47,5 @@ ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
ice-$(CONFIG_ICE_SWITCHDEV) += ice_eswitch.o
ice-$(CONFIG_ICE_SWITCHDEV) += ice_eswitch.o ice_eswitch_br.o
ice-$(CONFIG_GNSS) += ice_gnss.o
+4 −1
Original line number Diff line number Diff line
@@ -370,6 +370,7 @@ struct ice_vsi {
	u16 rx_buf_len;

	struct ice_aqc_vsi_props info;	 /* VSI properties */
	struct ice_vsi_vlan_info vlan_info;	/* vlan config to be restored */

	/* VSI stats */
	struct rtnl_link_stats64 net_stats;
@@ -517,6 +518,7 @@ enum ice_misc_thread_tasks {
struct ice_switchdev_info {
	struct ice_vsi *control_vsi;
	struct ice_vsi *uplink_vsi;
	struct ice_esw_br_offloads *br_offloads;
	bool is_running;
};

@@ -626,6 +628,7 @@ struct ice_pf {
	struct ice_lag *lag; /* Link Aggregation information */

	struct ice_switchdev_info switchdev;
	struct ice_esw_br_port *br_port;

#define ICE_INVALID_AGG_NODE_ID		0
#define ICE_PF_AGG_NODE_ID_START	1
@@ -853,7 +856,7 @@ static inline bool ice_is_adq_active(struct ice_pf *pf)
	return false;
}

bool netif_is_ice(struct net_device *dev);
bool netif_is_ice(const struct net_device *dev);
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
int ice_vsi_open_ctrl(struct ice_vsi *vsi);
+41 −5
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
#include "ice.h"
#include "ice_lib.h"
#include "ice_eswitch.h"
#include "ice_eswitch_br.h"
#include "ice_fltr.h"
#include "ice_repr.h"
#include "ice_devlink.h"
@@ -103,17 +104,28 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
		rule_added = true;
	}

	vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
	if (vlan_ops->dis_rx_filtering(uplink_vsi))
		goto err_dis_rx;

	if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override))
		goto err_override_uplink;

	if (ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_set_allow_override))
		goto err_override_control;

	if (ice_vsi_update_local_lb(uplink_vsi, true))
		goto err_override_local_lb;

	return 0;

err_override_local_lb:
	ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
err_override_control:
	ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
err_override_uplink:
	vlan_ops->ena_rx_filtering(uplink_vsi);
err_dis_rx:
	if (rule_added)
		ice_clear_dflt_vsi(uplink_vsi);
err_def_rx:
@@ -306,6 +318,9 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi)
	repr->src_vsi = vsi;
	repr->dst->u.port_info.port_id = vsi->vsi_num;

	if (repr->br_port)
		repr->br_port->vsi = vsi;

	ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
	if (ret) {
		ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI);
@@ -331,6 +346,9 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
	np = netdev_priv(netdev);
	vsi = np->vsi;

	if (!vsi || !ice_is_switchdev_running(vsi->back))
		return NETDEV_TX_BUSY;

	if (ice_is_reset_in_progress(vsi->back->state) ||
	    test_bit(ICE_VF_DIS, vsi->back->state))
		return NETDEV_TX_BUSY;
@@ -378,9 +396,14 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
{
	struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
	struct ice_vsi_vlan_ops *vlan_ops;

	vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);

	ice_vsi_update_local_lb(uplink_vsi, false);
	ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
	ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
	vlan_ops->ena_rx_filtering(uplink_vsi);
	ice_clear_dflt_vsi(uplink_vsi);
	ice_fltr_add_mac_and_broadcast(uplink_vsi,
				       uplink_vsi->port_info->mac.perm_addr,
@@ -455,16 +478,24 @@ static void ice_eswitch_napi_disable(struct ice_pf *pf)
 */
static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
{
	struct ice_vsi *ctrl_vsi;
	struct ice_vsi *ctrl_vsi, *uplink_vsi;

	uplink_vsi = ice_get_main_vsi(pf);
	if (!uplink_vsi)
		return -ENODEV;

	if (netif_is_any_bridge_port(uplink_vsi->netdev)) {
		dev_err(ice_pf_to_dev(pf),
			"Uplink port cannot be a bridge port\n");
		return -EINVAL;
	}

	pf->switchdev.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info);
	if (!pf->switchdev.control_vsi)
		return -ENODEV;

	ctrl_vsi = pf->switchdev.control_vsi;
	pf->switchdev.uplink_vsi = ice_get_main_vsi(pf);
	if (!pf->switchdev.uplink_vsi)
		goto err_vsi;
	pf->switchdev.uplink_vsi = uplink_vsi;

	if (ice_eswitch_setup_env(pf))
		goto err_vsi;
@@ -480,10 +511,15 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
	if (ice_vsi_open(ctrl_vsi))
		goto err_setup_reprs;

	if (ice_eswitch_br_offloads_init(pf))
		goto err_br_offloads;

	ice_eswitch_napi_enable(pf);

	return 0;

err_br_offloads:
	ice_vsi_close(ctrl_vsi);
err_setup_reprs:
	ice_repr_rem_from_all_vfs(pf);
err_repr_add:
@@ -502,8 +538,8 @@ static void ice_eswitch_disable_switchdev(struct ice_pf *pf)
	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;

	ice_eswitch_napi_disable(pf);
	ice_eswitch_br_offloads_deinit(pf);
	ice_eswitch_release_env(pf);
	ice_rem_adv_rule_for_vsi(&pf->hw, ctrl_vsi->idx);
	ice_eswitch_release_reprs(pf, ctrl_vsi);
	ice_vsi_release(ctrl_vsi);
	ice_repr_rem_from_all_vfs(pf);
+1309 −0

File added.

Preview size limit exceeded, changes collapsed.

+120 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2023, Intel Corporation. */

#ifndef _ICE_ESWITCH_BR_H_
#define _ICE_ESWITCH_BR_H_

#include <linux/rhashtable.h>
#include <linux/workqueue.h>

struct ice_esw_br_fdb_data {
	unsigned char addr[ETH_ALEN];
	u16 vid;
};

struct ice_esw_br_flow {
	struct ice_rule_query_data *fwd_rule;
	struct ice_rule_query_data *guard_rule;
};

enum {
	ICE_ESWITCH_BR_FDB_ADDED_BY_USER = BIT(0),
};

struct ice_esw_br_fdb_entry {
	struct ice_esw_br_fdb_data data;
	struct rhash_head ht_node;
	struct list_head list;

	int flags;

	struct net_device *dev;
	struct ice_esw_br_port *br_port;
	struct ice_esw_br_flow *flow;

	unsigned long last_use;
};

enum ice_esw_br_port_type {
	ICE_ESWITCH_BR_UPLINK_PORT = 0,
	ICE_ESWITCH_BR_VF_REPR_PORT = 1,
};

struct ice_esw_br_port {
	struct ice_esw_br *bridge;
	struct ice_vsi *vsi;
	enum ice_esw_br_port_type type;
	u16 vsi_idx;
	u16 pvid;
	struct xarray vlans;
};

enum {
	ICE_ESWITCH_BR_VLAN_FILTERING = BIT(0),
};

struct ice_esw_br {
	struct ice_esw_br_offloads *br_offloads;
	struct xarray ports;

	struct rhashtable fdb_ht;
	struct list_head fdb_list;

	int ifindex;
	u32 flags;
	unsigned long ageing_time;
};

struct ice_esw_br_offloads {
	struct ice_pf *pf;
	struct ice_esw_br *bridge;
	struct notifier_block netdev_nb;
	struct notifier_block switchdev_blk;
	struct notifier_block switchdev_nb;

	struct workqueue_struct *wq;
	struct delayed_work update_work;
};

struct ice_esw_br_fdb_work {
	struct work_struct work;
	struct switchdev_notifier_fdb_info fdb_info;
	struct net_device *dev;
	unsigned long event;
};

struct ice_esw_br_vlan {
	u16 vid;
	u16 flags;
};

#define ice_nb_to_br_offloads(nb, nb_name) \
	container_of(nb, \
		     struct ice_esw_br_offloads, \
		     nb_name)

#define ice_work_to_br_offloads(w) \
	container_of(w, \
		     struct ice_esw_br_offloads, \
		     update_work.work)

#define ice_work_to_fdb_work(w) \
	container_of(w, \
		     struct ice_esw_br_fdb_work, \
		     work)

static inline bool ice_eswitch_br_is_vid_valid(u16 vid)
{
	/* In trunk VLAN mode, for untagged traffic the bridge sends requests
	 * to offload VLAN 1 with pvid and untagged flags set. Since these
	 * flags are not supported, add a MAC filter instead.
	 */
	return vid > 1;
}

void
ice_eswitch_br_offloads_deinit(struct ice_pf *pf);
int
ice_eswitch_br_offloads_init(struct ice_pf *pf);

#endif /* _ICE_ESWITCH_BR_H_ */
Loading