Commit 389cb1ec authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'add-notifications-when-route-hardware-flags-change'

Ido Schimmel says:

====================
Add notifications when route hardware flags change

Routes installed to the kernel can be programmed to capable devices, in
which case they are marked with one of two flags. RTM_F_OFFLOAD for
routes that offload traffic from the kernel and RTM_F_TRAP for routes
that trap packets to the kernel for processing (e.g., host routes).

These flags are of interest to routing daemons since they would like to
delay advertisement of routes until they are installed in hardware. This
allows them to avoid packet loss or misrouted packets. Currently,
routing daemons do not receive any notifications when these flags are
changed, requiring them to poll the kernel tables for changes which is
inefficient.

This series addresses the issue by having the kernel emit RTM_NEWROUTE
notifications whenever these flags change. The behavior is controlled by
two sysctls (net.ipv4.fib_notify_on_flag_change and
net.ipv6.fib_notify_on_flag_change) that default to 0 (no
notifications).

Note that even if route installation in hardware is improved to be more
synchronous, these notifications are still of interest. For example, a
multipath route can change from RTM_F_OFFLOAD to RTM_F_TRAP if its
neighbours become invalid. A routing daemon can choose to withdraw /
replace the route in that case. In addition, the deletion of a route
from the kernel can prompt the installation of an identical route
(already in kernel, with an higher metric) to hardware.

For testing purposes, netdevsim is aligned to simulate a "real" driver
that programs routes to hardware.

Series overview:

Patches #1-#2 align netdevsim to perform route programming in a
non-atomic context

Patches #3-#5 add sysctl to control IPv4 notifications

Patches #6-#8 add sysctl to control IPv6 notifications

Patch #9 extends existing fib tests to set sysctls before running tests

Patch #10 adds test for fib notifications over netdevsim
====================

Link: https://lore.kernel.org/r/20210201194757.3463461-1-idosch@idosch.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 64b268e1 19d36d29
Loading
Loading
Loading
Loading
+40 −0
Original line number Diff line number Diff line
@@ -178,6 +178,26 @@ min_adv_mss - INTEGER
	The advertised MSS depends on the first hop route MTU, but will
	never be lower than this setting.

fib_notify_on_flag_change - INTEGER
        Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
        RTM_F_TRAP flags are changed.

        After installing a route to the kernel, user space receives an
        acknowledgment, which means the route was installed in the kernel,
        but not necessarily in hardware.
        It is also possible for a route already installed in hardware to change
        its action and therefore its flags. For example, a host route that is
        trapping packets can be "promoted" to perform decapsulation following
        the installation of an IPinIP/VXLAN tunnel.
        The notifications will indicate to user-space the state of the route.

        Default: 0 (Do not emit notifications.)

        Possible values:

        - 0 - Do not emit notifications.
        - 1 - Emit notifications.

IP Fragmentation:

ipfrag_high_thresh - LONG INTEGER
@@ -1775,6 +1795,26 @@ nexthop_compat_mode - BOOLEAN
	and extraneous notifications.
	Default: true (backward compat mode)

fib_notify_on_flag_change - INTEGER
        Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
        RTM_F_TRAP flags are changed.

        After installing a route to the kernel, user space receives an
        acknowledgment, which means the route was installed in the kernel,
        but not necessarily in hardware.
        It is also possible for a route already installed in hardware to change
        its action and therefore its flags. For example, a host route that is
        trapping packets can be "promoted" to perform decapsulation following
        the installation of an IPinIP/VXLAN tunnel.
        The notifications will indicate to user-space the state of the route.

        Default: 0 (Do not emit notifications.)

        Possible values:

        - 0 - Do not emit notifications.
        - 1 - Emit notifications.

IPv6 Fragmentation:

ip6frag_high_thresh - INTEGER
+20 −3
Original line number Diff line number Diff line
@@ -4988,6 +4988,7 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}

#if IS_ENABLED(CONFIG_IPV6)
static void
mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
				 struct mlxsw_sp_fib_entry *fib_entry)
@@ -5004,10 +5005,18 @@ mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
				  common);
	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
				       !should_offload);
		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
				       should_offload, !should_offload);
}
#else
static void
mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
				 struct mlxsw_sp_fib_entry *fib_entry)
{
}
#endif

#if IS_ENABLED(CONFIG_IPV6)
static void
mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
				   struct mlxsw_sp_fib_entry *fib_entry)
@@ -5018,8 +5027,16 @@ mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
				  common);
	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
				       false, false);
}
#else
static void
mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
				   struct mlxsw_sp_fib_entry *fib_entry)
{
}
#endif

static void
mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
+365 −169

File changed.

Preview size limit exceeded, changes collapsed.

+2 −7
Original line number Diff line number Diff line
@@ -336,13 +336,6 @@ static inline void fib6_info_release(struct fib6_info *f6i)
		call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
}

static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload,
					  bool trap)
{
	f6i->offload = offload;
	f6i->trap = trap;
}

enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
	FWS_S,
@@ -545,6 +538,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
{
	return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
			    bool offload, bool trap);

#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
struct bpf_iter__ipv6_route {
+2 −0
Original line number Diff line number Diff line
@@ -188,6 +188,8 @@ struct netns_ipv4 {
	int sysctl_udp_wmem_min;
	int sysctl_udp_rmem_min;

	int sysctl_fib_notify_on_flag_change;

#ifdef CONFIG_NET_L3_MASTER_DEV
	int sysctl_udp_l3mdev_accept;
#endif
Loading