Commit 6c446b63 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mlxsw-add-support-for-blackhole-nexthops'

Ido Schimmel says:

====================
mlxsw: Add support for blackhole nexthops

This patch set adds support for blackhole nexthops in mlxsw. These
nexthops are exactly the same as other nexthops, but instead of
forwarding packets to an egress router interface (RIF), they are
programmed to silently drop them.

Patches #1-#4 are preparations.

Patch #5 adds support for blackhole nexthops and removes the check that
prevented them from being programmed.

Patch #6 adds a selftests over mlxsw which tests that blackhole nexthops
can be programmed and are marked as offloaded.

Patch #7 extends the existing nexthop forwarding test to also test
blackhole functionality.

Patches #8-#10 add support for a new packet trap ('blackhole_nexthop')
which should be triggered whenever packets are dropped by a blackhole
nexthop. Obviously, by default, the trap action is set to 'drop' so that
dropped packets will not be reported.
====================

Link: https://lore.kernel.org/r/20201123071230.676469-1-idosch@idosch.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 23c01ed3 84e8feea
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -476,6 +476,10 @@ be added to the following table:
   * - ``esp_parsing``
     - ``drop``
     - Traps packets dropped due to an error in the ESP header parsing
   * - ``blackhole_nexthop``
     - ``drop``
     - Traps packets that the device decided to drop in case they hit a
       blackhole nexthop

Driver-specific Packet Traps
============================
+6 −3
Original line number Diff line number Diff line
@@ -913,7 +913,8 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)

	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
		if (mlxsw_sp_nexthop_offload(nh) &&
		    !mlxsw_sp_nexthop_group_has_ipip(nh))
		    !mlxsw_sp_nexthop_group_has_ipip(nh) &&
		    !mlxsw_sp_nexthop_is_discard(nh))
			size++;
	return size;
}
@@ -1105,7 +1106,8 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
	nh_count = 0;
	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
		if (!mlxsw_sp_nexthop_offload(nh) ||
		    mlxsw_sp_nexthop_group_has_ipip(nh))
		    mlxsw_sp_nexthop_group_has_ipip(nh) ||
		    mlxsw_sp_nexthop_is_discard(nh))
			continue;

		if (nh_count < nh_skip)
@@ -1186,7 +1188,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)

	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
		if (!mlxsw_sp_nexthop_offload(nh) ||
		    mlxsw_sp_nexthop_group_has_ipip(nh))
		    mlxsw_sp_nexthop_group_has_ipip(nh) ||
		    mlxsw_sp_nexthop_is_discard(nh))
			continue;

		mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
+79 −13
Original line number Diff line number Diff line
@@ -2858,9 +2858,10 @@ struct mlxsw_sp_nexthop {
	   offloaded:1, /* set in case the neigh is actually put into
			 * KVD linear area of this group.
			 */
	   update:1; /* set indicates that MAC of this neigh should be
	   update:1, /* set indicates that MAC of this neigh should be
		      * updated in HW
		      */
	   discard:1; /* nexthop is programmed to discard packets */
	enum mlxsw_sp_nexthop_type type;
	union {
		struct mlxsw_sp_neigh_entry *neigh_entry;
@@ -3011,6 +3012,11 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
	return false;
}

bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh)
{
	return nh->discard;
}

struct mlxsw_sp_nexthop_group_cmp_arg {
	enum mlxsw_sp_nexthop_group_type type;
	union {
@@ -3284,7 +3290,11 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,

	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
			    adj_index, neigh_entry->rif);
			    adj_index, nh->rif->rif_index);
	if (nh->discard)
		mlxsw_reg_ratr_trap_action_set(ratr_pl,
					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
	else
		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
	if (nh->counter_valid)
		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
@@ -4128,9 +4138,7 @@ mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
{
	int err = -EINVAL;

	if (nh->is_reject)
		NL_SET_ERR_MSG_MOD(extack, "Blackhole nexthops are not supported");
	else if (nh->is_fdb)
	if (nh->is_fdb)
		NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
	else if (nh->has_encap)
		NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
@@ -4165,7 +4173,7 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
		/* Device only nexthops with an IPIP device are programmed as
		 * encapsulating adjacency entries.
		 */
		if (!nh->gw_family &&
		if (!nh->gw_family && !nh->is_reject &&
		    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
			NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
			return -EINVAL;
@@ -4199,10 +4207,31 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
		return true;

	dev = info->nh->dev;
	return info->nh->gw_family ||
	return info->nh->gw_family || info->nh->is_reject ||
	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
}

static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
						struct mlxsw_sp_nexthop *nh)
{
	u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;

	nh->discard = 1;
	nh->should_offload = 1;
	/* While nexthops that discard packets do not forward packets
	 * via an egress RIF, they still need to be programmed using a
	 * valid RIF, so use the loopback RIF created during init.
	 */
	nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
}

static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
						struct mlxsw_sp_nexthop *nh)
{
	nh->rif = NULL;
	nh->should_offload = 0;
}

static int
mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
			  struct mlxsw_sp_nexthop_group *nh_grp,
@@ -4236,6 +4265,9 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
	if (err)
		goto err_type_init;

	if (nh_obj->is_reject)
		mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);

	return 0;

err_type_init:
@@ -4247,6 +4279,8 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
				      struct mlxsw_sp_nexthop *nh)
{
	if (nh->discard)
		mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
	list_del(&nh->router_list_node);
	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
@@ -4994,7 +5028,7 @@ int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
	return err;
}

static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp)
{
	enum mlxsw_reg_ratr_trap_action trap_action;
	char ratr_pl[MLXSW_REG_RATR_LEN];
@@ -5008,11 +5042,13 @@ static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
	if (err)
		return err;

	trap_action = MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS;
	trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
			    MLXSW_REG_RATR_TYPE_ETHERNET,
			    mlxsw_sp->router->adj_discard_index, rif_index);
			    mlxsw_sp->router->adj_discard_index,
			    mlxsw_sp->router->lb_rif_index);
	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
	mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
	if (err)
		goto err_ratr_write;
@@ -5050,8 +5086,7 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
		adjacency_index = nhgi->adj_index;
		ecmp_size = nhgi->ecmp_size;
	} else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) {
		err = mlxsw_sp_adj_discard_write(mlxsw_sp,
						 nhgi->nh_rif->rif_index);
		err = mlxsw_sp_adj_discard_write(mlxsw_sp);
		if (err)
			return err;
		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
@@ -8918,6 +8953,30 @@ static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router)
	kfree(router->ll_op_ctx);
}

static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
{
	u16 lb_rif_index;
	int err;

	/* Create a generic loopback RIF associated with the main table
	 * (default VRF). Any table can be used, but the main table exists
	 * anyway, so we do not waste resources.
	 */
	err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
					 &lb_rif_index);
	if (err)
		return err;

	mlxsw_sp->router->lb_rif_index = lb_rif_index;

	return 0;
}

static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
{
	mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
}

int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
			 struct netlink_ext_ack *extack)
{
@@ -8974,6 +9033,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
	if (err)
		goto err_vrs_init;

	err = mlxsw_sp_lb_rif_init(mlxsw_sp);
	if (err)
		goto err_lb_rif_init;

	err = mlxsw_sp_neigh_init(mlxsw_sp);
	if (err)
		goto err_neigh_init;
@@ -9039,6 +9102,8 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
err_mp_hash_init:
	mlxsw_sp_neigh_fini(mlxsw_sp);
err_neigh_init:
	mlxsw_sp_lb_rif_fini(mlxsw_sp);
err_lb_rif_init:
	mlxsw_sp_vrs_fini(mlxsw_sp);
err_vrs_init:
	mlxsw_sp_mr_fini(mlxsw_sp);
@@ -9074,6 +9139,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
	mlxsw_core_flush_owq();
	WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue));
	mlxsw_sp_neigh_fini(mlxsw_sp);
	mlxsw_sp_lb_rif_fini(mlxsw_sp);
	mlxsw_sp_vrs_fini(mlxsw_sp);
	mlxsw_sp_mr_fini(mlxsw_sp);
	mlxsw_sp_lpm_fini(mlxsw_sp);
+2 −0
Original line number Diff line number Diff line
@@ -75,6 +75,7 @@ struct mlxsw_sp_router {
	/* One set of ops for each protocol: IPv4 and IPv6 */
	const struct mlxsw_sp_router_ll_ops *proto_ll_ops[MLXSW_SP_L3_PROTO_MAX];
	struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx;
	u16 lb_rif_index;
};

struct mlxsw_sp_fib_entry_priv {
@@ -200,6 +201,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
			     u32 *p_adj_size, u32 *p_adj_hash_index);
struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh);
#define mlxsw_sp_nexthop_for_each(nh, router)				\
	for (nh = mlxsw_sp_nexthop_next(router, NULL); nh;		\
	     nh = mlxsw_sp_nexthop_next(router, nh))
+7 −1
Original line number Diff line number Diff line
@@ -617,7 +617,7 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
					       TRAP_TO_CPU),
			MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_EXCEPTIONS,
					       TRAP_TO_CPU),
			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_EXCEPTIONS,
			MLXSW_SP_RXL_EXCEPTION(RTR_EGRESS0, L3_EXCEPTIONS,
					       TRAP_EXCEPTION_TO_CPU),
		},
	},
@@ -1007,6 +1007,12 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
					     false),
		},
	},
	{
		.trap = MLXSW_SP_TRAP_DROP(BLACKHOLE_NEXTHOP, L3_DROPS),
		.listeners_arr = {
			MLXSW_SP_RXL_DISCARD(ROUTER3, L3_DISCARDS),
		},
	},
};

static struct mlxsw_sp_trap_policer_item *
Loading