Commit 25c800b2 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bridge-neigh-suppression'

Ido Schimmel says:

====================
bridge: Add per-{Port, VLAN} neighbor suppression

Background
==========

In order to minimize the flooding of ARP and ND messages in the VXLAN
network, EVPN includes provisions [1] that allow participating VTEPs to
suppress such messages in case they know the MAC-IP binding and can
reply on behalf of the remote host. In Linux, the above is implemented
in the bridge driver using a per-port option called "neigh_suppress"
that was added in kernel version 4.15 [2].

Motivation
==========

Some applications use ARP messages as keepalives between the application
nodes in the network. This works perfectly well when two nodes are
connected to the same VTEP. When a node goes down it will stop
responding to ARP requests and the other node will notice it
immediately.

However, when the two nodes are connected to different VTEPs and
neighbor suppression is enabled, the local VTEP will reply to ARP
requests even after the remote node went down, until certain timers
expire and the EVPN control plane decides to withdraw the MAC/IP
Advertisement route for the address. Therefore, some users would like to
be able to disable neighbor suppression on VLANs where such applications
reside and keep it enabled on the rest.

Implementation
==============

The proposed solution is to allow user space to control neighbor
suppression on a per-{Port, VLAN} basis, in a similar fashion to other
per-port options that gained per-{Port, VLAN} counterparts such as
"mcast_router". This allows users to benefit from the operational
simplicity and scalability associated with shared VXLAN devices (i.e.,
external / collect-metadata mode), while still allowing for per-VLAN/VNI
neighbor suppression control.

The user interface is extended with a new "neigh_vlan_suppress" bridge
port option that allows user space to enable per-{Port, VLAN} neighbor
suppression on the bridge port. When enabled, the existing
"neigh_suppress" option has no effect and neighbor suppression is
controlled using a new "neigh_suppress" VLAN option. Example usage:

 # bridge link set dev vxlan0 neigh_vlan_suppress on
 # bridge vlan add vid 10 dev vxlan0
 # bridge vlan set vid 10 dev vxlan0 neigh_suppress on

Testing
=======

Tested using existing bridge selftests. Added a dedicated selftest in
the last patch.

Patchset overview
=================

Patches #1-#5 are preparations.

Patch #6 adds per-{Port, VLAN} neighbor suppression support to the
bridge's data path.

Patches #7-#8 add the required netlink attributes to enable the feature.

Patch #9 adds a selftest.

iproute2 patches can be found here [3].

Changelog
=========

Since RFC [4]:

No changes.

[1] https://www.rfc-editor.org/rfc/rfc7432#section-10
[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a42317785c898c0ed46db45a33b0cc71b671bf29
[3] https://github.com/idosch/iproute2/tree/submit/neigh_suppress_v1
[4] https://lore.kernel.org/netdev/20230413095830.2182382-1-idosch@nvidia.com/


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1cf3fe1c 7648ac72
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -60,6 +60,7 @@ struct br_ip_list {
#define BR_TX_FWD_OFFLOAD	BIT(20)
#define BR_PORT_LOCKED		BIT(21)
#define BR_PORT_MAB		BIT(22)
#define BR_NEIGH_VLAN_SUPPRESS	BIT(23)

#define BR_DEFAULT_AGEING_TIME	(300 * HZ)

+1 −0
Original line number Diff line number Diff line
@@ -525,6 +525,7 @@ enum {
	BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
	BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
	BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
	BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS,
	__BRIDGE_VLANDB_ENTRY_MAX,
};
#define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
+1 −0
Original line number Diff line number Diff line
@@ -569,6 +569,7 @@ enum {
	IFLA_BRPORT_MAB,
	IFLA_BRPORT_MCAST_N_GROUPS,
	IFLA_BRPORT_MCAST_MAX_GROUPS,
	IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
	__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
+27 −6
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
	bool neigh_suppress = false;

	list_for_each_entry(p, &br->port_list, list) {
		if (p->flags & BR_NEIGH_SUPPRESS) {
		if (p->flags & (BR_NEIGH_SUPPRESS | BR_NEIGH_VLAN_SUPPRESS)) {
			neigh_suppress = true;
			break;
		}
@@ -158,7 +158,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
		return;

	if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
		if (p && (p->flags & BR_NEIGH_SUPPRESS))
		if (br_is_neigh_suppress_enabled(p, vid))
			return;
		if (parp->ar_op != htons(ARPOP_RREQUEST) &&
		    parp->ar_op != htons(ARPOP_RREPLY) &&
@@ -202,8 +202,8 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
			bool replied = false;

			if ((p && (p->flags & BR_PROXYARP)) ||
			    (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
							 BR_NEIGH_SUPPRESS)))) {
			    (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)) ||
			    br_is_neigh_suppress_enabled(f->dst, vid)) {
				if (!vid)
					br_arp_send(br, p, skb->dev, sip, tip,
						    sha, n->ha, sha, 0, 0);
@@ -407,7 +407,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,

	BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0;

	if (p && (p->flags & BR_NEIGH_SUPPRESS))
	if (br_is_neigh_suppress_enabled(p, vid))
		return;

	if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
@@ -461,7 +461,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
		if (f) {
			bool replied = false;

			if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
			if (br_is_neigh_suppress_enabled(f->dst, vid)) {
				if (vid != 0)
					br_nd_send(br, p, skb, n,
						   skb->vlan_proto,
@@ -483,3 +483,24 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
	}
}
#endif

bool br_is_neigh_suppress_enabled(const struct net_bridge_port *p, u16 vid)
{
	if (!p)
		return false;

	if (!vid)
		return !!(p->flags & BR_NEIGH_SUPPRESS);

	if (p->flags & BR_NEIGH_VLAN_SUPPRESS) {
		struct net_bridge_vlan_group *vg = nbp_vlan_group_rcu(p);
		struct net_bridge_vlan *v;

		v = br_vlan_find(vg, vid);
		if (!v)
			return false;
		return !!(v->priv_flags & BR_VLFLAG_NEIGH_SUPPRESS_ENABLED);
	} else {
		return !!(p->flags & BR_NEIGH_SUPPRESS);
	}
}
+4 −4
Original line number Diff line number Diff line
@@ -80,10 +80,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)

	dest = eth_hdr(skb)->h_dest;
	if (is_broadcast_ether_addr(dest)) {
		br_flood(br, skb, BR_PKT_BROADCAST, false, true);
		br_flood(br, skb, BR_PKT_BROADCAST, false, true, vid);
	} else if (is_multicast_ether_addr(dest)) {
		if (unlikely(netpoll_tx_running(dev))) {
			br_flood(br, skb, BR_PKT_MULTICAST, false, true);
			br_flood(br, skb, BR_PKT_MULTICAST, false, true, vid);
			goto out;
		}
		if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) {
@@ -96,11 +96,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
		    br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst))
			br_multicast_flood(mdst, skb, brmctx, false, true);
		else
			br_flood(br, skb, BR_PKT_MULTICAST, false, true);
			br_flood(br, skb, BR_PKT_MULTICAST, false, true, vid);
	} else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) {
		br_forward(dst->dst, skb, false, true);
	} else {
		br_flood(br, skb, BR_PKT_UNICAST, false, true);
		br_flood(br, skb, BR_PKT_UNICAST, false, true, vid);
	}
out:
	rcu_read_unlock();
Loading