Commit 5ea3c72c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'route-offload-failure'



net: Add support for route offload failure notifications

Ido Schimmel  says:

====================
This is a complementary series to the one merged in commit 389cb1ec
("Merge branch 'add-notifications-when-route-hardware-flags-change'").

The previous series added RTM_NEWROUTE notifications to user space
whenever a route was successfully installed in hardware or when its
state in hardware changed. This allows routing daemons to delay
advertisement of routes until they are installed in hardware.

However, if route installation failed, a routing daemon will wait
indefinitely for a notification that will never come. The aim of this
series is to provide a failure notification via a new flag
(RTM_F_OFFLOAD_FAILED) in the RTM_NEWROUTE message. Upon such a
notification a routing daemon may decide to withdraw the route from the
FIB.

Series overview:

Patch #1 adds the new RTM_F_OFFLOAD_FAILED flag

Patches #2-#3 and #4-#5 add failure notifications to IPv4 and IPv6,
respectively

Patches #6-#8 teach netdevsim to fail route installation via a new knob
in debugfs

Patch #9 extends mlxsw to mark routes with the new flag

Patch #10 adds test cases for the new notification over netdevsim
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 08cbabb7 9ee53e37
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -180,7 +180,7 @@ min_adv_mss - INTEGER

fib_notify_on_flag_change - INTEGER
        Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
        RTM_F_TRAP flags are changed.
        RTM_F_TRAP/RTM_F_OFFLOAD_FAILED flags are changed.

        After installing a route to the kernel, user space receives an
        acknowledgment, which means the route was installed in the kernel,
@@ -197,6 +197,7 @@ fib_notify_on_flag_change - INTEGER

        - 0 - Do not emit notifications.
        - 1 - Emit notifications.
        - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change.

IP Fragmentation:

@@ -1797,7 +1798,7 @@ nexthop_compat_mode - BOOLEAN

fib_notify_on_flag_change - INTEGER
        Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
        RTM_F_TRAP flags are changed.
        RTM_F_TRAP/RTM_F_OFFLOAD_FAILED flags are changed.

        After installing a route to the kernel, user space receives an
        acknowledgment, which means the route was installed in the kernel,
@@ -1814,6 +1815,7 @@ fib_notify_on_flag_change - INTEGER

        - 0 - Do not emit notifications.
        - 1 - Emit notifications.
        - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change.

IPv6 Fragmentation:

+56 −2
Original line number Diff line number Diff line
@@ -4942,6 +4942,25 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
	return NULL;
}

static void
mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
				      struct fib_entry_notifier_info *fen_info)
{
	u32 *p_dst = (u32 *) &fen_info->dst;
	struct fib_rt_info fri;

	fri.fi = fen_info->fi;
	fri.tb_id = fen_info->tb_id;
	fri.dst = cpu_to_be32(*p_dst);
	fri.dst_len = fen_info->dst_len;
	fri.tos = fen_info->tos;
	fri.type = fen_info->type;
	fri.offload = false;
	fri.trap = false;
	fri.offload_failed = true;
	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}

static void
mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
				 struct mlxsw_sp_fib_entry *fib_entry)
@@ -4963,6 +4982,7 @@ mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
	fri.type = fib4_entry->type;
	fri.offload = should_offload;
	fri.trap = !should_offload;
	fri.offload_failed = false;
	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}

@@ -4985,9 +5005,34 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
	fri.type = fib4_entry->type;
	fri.offload = false;
	fri.trap = false;
	fri.offload_failed = false;
	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}

#if IS_ENABLED(CONFIG_IPV6)
static void
mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
				      struct fib6_info **rt_arr,
				      unsigned int nrt6)
{
	int i;

	/* In IPv6 a multipath route is represented using multiple routes, so
	 * we need to set the flags on all of them.
	 */
	for (i = 0; i < nrt6; i++)
		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
				       false, false, true);
}
#else
static void
mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
				      struct fib6_info **rt_arr,
				      unsigned int nrt6)
{
}
#endif

#if IS_ENABLED(CONFIG_IPV6)
static void
mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
@@ -5006,7 +5051,7 @@ mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
				  common);
	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
				       should_offload, !should_offload);
				       should_offload, !should_offload, false);
}
#else
static void
@@ -5028,7 +5073,7 @@ mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
				  common);
	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
				       false, false);
				       false, false, false);
}
#else
static void
@@ -7021,6 +7066,8 @@ static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp,
		if (err) {
			mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
			mlxsw_sp_router_fib_abort(mlxsw_sp);
			mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
							      &fib_event->fen_info);
		}
		fib_info_put(fib_event->fen_info.fi);
		break;
@@ -7042,6 +7089,7 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
					       struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
					       struct mlxsw_sp_fib_event *fib_event)
{
	struct mlxsw_sp_fib6_event *fib6_event = &fib_event->fib6_event;
	int err;

	mlxsw_sp_span_respin(mlxsw_sp);
@@ -7053,6 +7101,9 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
		if (err) {
			mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
			mlxsw_sp_router_fib_abort(mlxsw_sp);
			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
							      fib6_event->rt_arr,
							      fib6_event->nrt6);
		}
		mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
		break;
@@ -7062,6 +7113,9 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
		if (err) {
			mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
			mlxsw_sp_router_fib_abort(mlxsw_sp);
			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
							      fib6_event->rt_arr,
							      fib6_event->nrt6);
		}
		mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event);
		break;
+21 −19
Original line number Diff line number Diff line
@@ -1012,23 +1012,25 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
	nsim_dev->fw_update_status = true;
	nsim_dev->fw_update_overwrite_mask = 0;

	nsim_dev->fib_data = nsim_fib_create(devlink, extack);
	if (IS_ERR(nsim_dev->fib_data))
		return PTR_ERR(nsim_dev->fib_data);

	nsim_devlink_param_load_driverinit_values(devlink);

	err = nsim_dev_dummy_region_init(nsim_dev, devlink);
	if (err)
		goto err_fib_destroy;
		return err;

	err = nsim_dev_traps_init(devlink);
	if (err)
		goto err_dummy_region_exit;

	nsim_dev->fib_data = nsim_fib_create(devlink, extack);
	if (IS_ERR(nsim_dev->fib_data)) {
		err = PTR_ERR(nsim_dev->fib_data);
		goto err_traps_exit;
	}

	err = nsim_dev_health_init(nsim_dev, devlink);
	if (err)
		goto err_traps_exit;
		goto err_fib_destroy;

	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
	if (err)
@@ -1043,12 +1045,12 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,

err_health_exit:
	nsim_dev_health_exit(nsim_dev);
err_fib_destroy:
	nsim_fib_destroy(devlink, nsim_dev->fib_data);
err_traps_exit:
	nsim_dev_traps_exit(devlink);
err_dummy_region_exit:
	nsim_dev_dummy_region_exit(nsim_dev);
err_fib_destroy:
	nsim_fib_destroy(devlink, nsim_dev->fib_data);
	return err;
}

@@ -1080,15 +1082,9 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
	if (err)
		goto err_devlink_free;

	nsim_dev->fib_data = nsim_fib_create(devlink, NULL);
	if (IS_ERR(nsim_dev->fib_data)) {
		err = PTR_ERR(nsim_dev->fib_data);
		goto err_resources_unregister;
	}

	err = devlink_register(devlink, &nsim_bus_dev->dev);
	if (err)
		goto err_fib_destroy;
		goto err_resources_unregister;

	err = devlink_params_register(devlink, nsim_devlink_params,
				      ARRAY_SIZE(nsim_devlink_params));
@@ -1108,9 +1104,15 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
	if (err)
		goto err_traps_exit;

	nsim_dev->fib_data = nsim_fib_create(devlink, NULL);
	if (IS_ERR(nsim_dev->fib_data)) {
		err = PTR_ERR(nsim_dev->fib_data);
		goto err_debugfs_exit;
	}

	err = nsim_dev_health_init(nsim_dev, devlink);
	if (err)
		goto err_debugfs_exit;
		goto err_fib_destroy;

	err = nsim_bpf_dev_init(nsim_dev);
	if (err)
@@ -1128,6 +1130,8 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
	nsim_bpf_dev_exit(nsim_dev);
err_health_exit:
	nsim_dev_health_exit(nsim_dev);
err_fib_destroy:
	nsim_fib_destroy(devlink, nsim_dev->fib_data);
err_debugfs_exit:
	nsim_dev_debugfs_exit(nsim_dev);
err_traps_exit:
@@ -1139,8 +1143,6 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
				  ARRAY_SIZE(nsim_devlink_params));
err_dl_unregister:
	devlink_unregister(devlink);
err_fib_destroy:
	nsim_fib_destroy(devlink, nsim_dev->fib_data);
err_resources_unregister:
	devlink_resources_unregister(devlink, NULL);
err_devlink_free:
@@ -1157,10 +1159,10 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev)
	debugfs_remove(nsim_dev->take_snapshot);
	nsim_dev_port_del_all(nsim_dev);
	nsim_dev_health_exit(nsim_dev);
	nsim_fib_destroy(devlink, nsim_dev->fib_data);
	nsim_dev_traps_exit(devlink);
	nsim_dev_dummy_region_exit(nsim_dev);
	mutex_destroy(&nsim_dev->port_list_lock);
	nsim_fib_destroy(devlink, nsim_dev->fib_data);
}

void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev)
+117 −6
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <net/fib_rules.h>
#include <net/net_namespace.h>
#include <net/nexthop.h>
#include <linux/debugfs.h>

#include "netdevsim.h"

@@ -53,6 +54,8 @@ struct nsim_fib_data {
	struct work_struct fib_event_work;
	struct list_head fib_event_queue;
	spinlock_t fib_event_queue_lock; /* Protects fib event queue list */
	struct dentry *ddir;
	bool fail_route_offload;
};

struct nsim_fib_rt_key {
@@ -303,6 +306,25 @@ nsim_fib4_rt_lookup(struct rhashtable *fib_rt_ht,
	return container_of(fib_rt, struct nsim_fib4_rt, common);
}

static void
nsim_fib4_rt_offload_failed_flag_set(struct net *net,
				     struct fib_entry_notifier_info *fen_info)
{
	u32 *p_dst = (u32 *)&fen_info->dst;
	struct fib_rt_info fri;

	fri.fi = fen_info->fi;
	fri.tb_id = fen_info->tb_id;
	fri.dst = cpu_to_be32(*p_dst);
	fri.dst_len = fen_info->dst_len;
	fri.tos = fen_info->tos;
	fri.type = fen_info->type;
	fri.offload = false;
	fri.trap = false;
	fri.offload_failed = true;
	fib_alias_hw_flags_set(net, &fri);
}

static void nsim_fib4_rt_hw_flags_set(struct net *net,
				      const struct nsim_fib4_rt *fib4_rt,
				      bool trap)
@@ -319,6 +341,7 @@ static void nsim_fib4_rt_hw_flags_set(struct net *net,
	fri.type = fib4_rt->type;
	fri.offload = false;
	fri.trap = trap;
	fri.offload_failed = false;
	fib_alias_hw_flags_set(net, &fri);
}

@@ -383,6 +406,15 @@ static int nsim_fib4_rt_insert(struct nsim_fib_data *data,
	struct nsim_fib4_rt *fib4_rt, *fib4_rt_old;
	int err;

	if (data->fail_route_offload) {
		/* For testing purposes, user set debugfs fail_route_offload
		 * value to true. Simulate hardware programming latency and then
		 * fail.
		 */
		msleep(1);
		return -EINVAL;
	}

	fib4_rt = nsim_fib4_rt_create(data, fen_info);
	if (!fib4_rt)
		return -ENOMEM;
@@ -405,7 +437,7 @@ static void nsim_fib4_rt_remove(struct nsim_fib_data *data,
	struct nsim_fib4_rt *fib4_rt;

	fib4_rt = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info);
	if (WARN_ON_ONCE(!fib4_rt))
	if (!fib4_rt)
		return;

	rhashtable_remove_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node,
@@ -422,6 +454,11 @@ static int nsim_fib4_event(struct nsim_fib_data *data,
	switch (event) {
	case FIB_EVENT_ENTRY_REPLACE:
		err = nsim_fib4_rt_insert(data, fen_info);
		if (err) {
			struct net *net = devlink_net(data->devlink);

			nsim_fib4_rt_offload_failed_flag_set(net, fen_info);
		}
		break;
	case FIB_EVENT_ENTRY_DEL:
		nsim_fib4_rt_remove(data, fen_info);
@@ -481,7 +518,7 @@ static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt,
	struct nsim_fib6_rt_nh *fib6_rt_nh;

	fib6_rt_nh = nsim_fib6_rt_nh_find(fib6_rt, rt);
	if (WARN_ON_ONCE(!fib6_rt_nh))
	if (!fib6_rt_nh)
		return;

	fib6_rt->nhs--;
@@ -563,8 +600,17 @@ static int nsim_fib6_rt_append(struct nsim_fib_data *data,
	struct nsim_fib6_rt *fib6_rt;
	int i, err;

	if (data->fail_route_offload) {
		/* For testing purposes, user set debugfs fail_route_offload
		 * value to true. Simulate hardware programming latency and then
		 * fail.
		 */
		msleep(1);
		return -EINVAL;
	}

	fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt);
	if (WARN_ON_ONCE(!fib6_rt))
	if (!fib6_rt)
		return -EINVAL;

	for (i = 0; i < fib6_event->nrt6; i++) {
@@ -585,6 +631,26 @@ static int nsim_fib6_rt_append(struct nsim_fib_data *data,
	return err;
}

#if IS_ENABLED(CONFIG_IPV6)
static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data,
						 struct fib6_info **rt_arr,
						 unsigned int nrt6)

{
	struct net *net = devlink_net(data->devlink);
	int i;

	for (i = 0; i < nrt6; i++)
		fib6_info_hw_flags_set(net, rt_arr[i], false, false, true);
}
#else
static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data,
						 struct fib6_info **rt_arr,
						 unsigned int nrt6)
{
}
#endif

#if IS_ENABLED(CONFIG_IPV6)
static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data,
				      const struct nsim_fib6_rt *fib6_rt,
@@ -594,7 +660,7 @@ static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data,
	struct nsim_fib6_rt_nh *fib6_rt_nh;

	list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list)
		fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap);
		fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap, false);
}
#else
static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data,
@@ -666,6 +732,15 @@ static int nsim_fib6_rt_insert(struct nsim_fib_data *data,
	struct nsim_fib6_rt *fib6_rt, *fib6_rt_old;
	int err;

	if (data->fail_route_offload) {
		/* For testing purposes, user set debugfs fail_route_offload
		 * value to true. Simulate hardware programming latency and then
		 * fail.
		 */
		msleep(1);
		return -EINVAL;
	}

	fib6_rt = nsim_fib6_rt_create(data, fib6_event->rt_arr,
				      fib6_event->nrt6);
	if (IS_ERR(fib6_rt))
@@ -763,7 +838,7 @@ static int nsim_fib6_event(struct nsim_fib_data *data,
			   struct nsim_fib6_event *fib6_event,
			   unsigned long event)
{
	int err = 0;
	int err;

	if (fib6_event->rt_arr[0]->fib6_src.plen)
		return 0;
@@ -771,9 +846,13 @@ static int nsim_fib6_event(struct nsim_fib_data *data,
	switch (event) {
	case FIB_EVENT_ENTRY_REPLACE:
		err = nsim_fib6_rt_insert(data, fib6_event);
		if (err)
			goto err_rt_offload_failed_flag_set;
		break;
	case FIB_EVENT_ENTRY_APPEND:
		err = nsim_fib6_rt_append(data, fib6_event);
		if (err)
			goto err_rt_offload_failed_flag_set;
		break;
	case FIB_EVENT_ENTRY_DEL:
		nsim_fib6_rt_remove(data, fib6_event);
@@ -782,6 +861,11 @@ static int nsim_fib6_event(struct nsim_fib_data *data,
		break;
	}

	return 0;

err_rt_offload_failed_flag_set:
	nsim_fib6_rt_offload_failed_flag_set(data, fib6_event->rt_arr,
					     fib6_event->nrt6);
	return err;
}

@@ -1289,10 +1373,29 @@ static void nsim_fib_event_work(struct work_struct *work)
	mutex_unlock(&data->fib_lock);
}

static int
nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev)
{
	data->ddir = debugfs_create_dir("fib", nsim_dev->ddir);
	if (IS_ERR(data->ddir))
		return PTR_ERR(data->ddir);

	data->fail_route_offload = false;
	debugfs_create_bool("fail_route_offload", 0600, data->ddir,
			    &data->fail_route_offload);
	return 0;
}

static void nsim_fib_debugfs_exit(struct nsim_fib_data *data)
{
	debugfs_remove_recursive(data->ddir);
}

struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
				      struct netlink_ext_ack *extack)
{
	struct nsim_fib_data *data;
	struct nsim_dev *nsim_dev;
	int err;

	data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1300,10 +1403,15 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
		return ERR_PTR(-ENOMEM);
	data->devlink = devlink;

	err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params);
	nsim_dev = devlink_priv(devlink);
	err = nsim_fib_debugfs_init(data, nsim_dev);
	if (err)
		goto err_data_free;

	err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params);
	if (err)
		goto err_debugfs_exit;

	mutex_init(&data->fib_lock);
	INIT_LIST_HEAD(&data->fib_rt_list);
	err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params);
@@ -1364,6 +1472,8 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
	rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free,
				    data);
	mutex_destroy(&data->fib_lock);
err_debugfs_exit:
	nsim_fib_debugfs_exit(data);
err_data_free:
	kfree(data);
	return ERR_PTR(err);
@@ -1391,5 +1501,6 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
	WARN_ON_ONCE(!list_empty(&data->fib_event_queue));
	WARN_ON_ONCE(!list_empty(&data->fib_rt_list));
	mutex_destroy(&data->fib_lock);
	nsim_fib_debugfs_exit(data);
	kfree(data);
}
+3 −2
Original line number Diff line number Diff line
@@ -195,7 +195,8 @@ struct fib6_info {
					fib6_destroying:1,
					offload:1,
					trap:1,
					unused:2;
					offload_failed:1,
					unused:1;

	struct rcu_head			rcu;
	struct nexthop			*nh;
@@ -539,7 +540,7 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
	return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
			    bool offload, bool trap);
			    bool offload, bool trap, bool offload_failed);

#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
struct bpf_iter__ipv6_route {
Loading