Commit bf837e8f authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'nexthop-refactor-and-fix-nexthop-selection-for-multipath-routes'

Benjamin Poirier says:

====================
nexthop: Refactor and fix nexthop selection for multipath routes

In order to select a nexthop for multipath routes, fib_select_multipath()
is used with legacy nexthops and nexthop_select_path_hthr() is used with
nexthop objects. Those two functions perform a validity test on the
neighbor related to each nexthop but their logic is structured differently.
This causes a divergence in behavior and nexthop_select_path_hthr() may
return a nexthop that failed the neighbor validity test even if there was
one that passed.

Refactor nexthop_select_path_hthr() to make it more similar to
fib_select_multipath() and fix the problem mentioned above.

v1: https://lore.kernel.org/netdev/20230529201914.69828-1-bpoirier@nvidia.com/
====================

Link: https://lore.kernel.org/r/20230719-nh_select-v2-0-04383e89f868@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 022add1d c7e95bbd
Loading
Loading
Loading
Loading
+41 −18
Original line number Diff line number Diff line
@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
	return !!(state & NUD_VALID);
}

static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
static bool nexthop_is_good_nh(const struct nexthop *nh)
{
	struct nh_info *nhi = rcu_dereference(nh->nh_info);

	switch (nhi->family) {
	case AF_INET:
		return ipv4_good_nh(&nhi->fib_nh);
	case AF_INET6:
		return ipv6_good_nh(&nhi->fib6_nh);
	}

	return false;
}

static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
{
	struct nexthop *rc = NULL;
	int i;

	for (i = 0; i < nhg->num_nh; ++i) {
	for (i = 0; i < nhg->num_nh; i++) {
		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
		struct nh_info *nhi;

		if (hash > atomic_read(&nhge->hthr.upper_bound))
			continue;

		nhi = rcu_dereference(nhge->nh->nh_info);
		if (nhi->fdb_nh)
		return nhge->nh;
	}

	WARN_ON_ONCE(1);
	return NULL;
}

static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
{
	struct nexthop *rc = NULL;
	int i;

	if (nhg->fdb_nh)
		return nexthop_select_path_fdb(nhg, hash);

	for (i = 0; i < nhg->num_nh; ++i) {
		struct nh_grp_entry *nhge = &nhg->nh_entries[i];

		/* nexthops always check if it is good and does
		 * not rely on a sysctl for this behavior
		 */
		switch (nhi->family) {
		case AF_INET:
			if (ipv4_good_nh(&nhi->fib_nh))
				return nhge->nh;
			break;
		case AF_INET6:
			if (ipv6_good_nh(&nhi->fib6_nh))
				return nhge->nh;
			break;
		}
		if (!nexthop_is_good_nh(nhge->nh))
			continue;

		if (!rc)
			rc = nhge->nh;

		if (hash > atomic_read(&nhge->hthr.upper_bound))
			continue;

		return nhge->nh;
	}

	return rc;
	return rc ? : nhg->nh_entries[0].nh;
}

static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
+129 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@ IPV4_TESTS="
	ipv4_large_res_grp
	ipv4_compat_mode
	ipv4_fdb_grp_fcnal
	ipv4_mpath_select
	ipv4_torture
	ipv4_res_torture
"
@@ -42,6 +43,7 @@ IPV6_TESTS="
	ipv6_large_res_grp
	ipv6_compat_mode
	ipv6_fdb_grp_fcnal
	ipv6_mpath_select
	ipv6_torture
	ipv6_res_torture
"
@@ -370,6 +372,27 @@ check_large_res_grp()
	log_test $? 0 "Dump large (x$buckets) nexthop buckets"
}

get_route_dev()
{
	local pfx="$1"
	local out

	if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then
		echo "$out"
	fi
}

check_route_dev()
{
	local pfx="$1"
	local expected="$2"
	local out

	out=$(get_route_dev "$pfx")

	check_output "$out" "$expected"
}

start_ip_monitor()
{
	local mtype=$1
@@ -575,6 +598,112 @@ ipv4_fdb_grp_fcnal()
	$IP link del dev vx10
}

ipv4_mpath_select()
{
	local rc dev match h addr

	echo
	echo "IPv4 multipath selection"
	echo "------------------------"
	if [ ! -x "$(command -v jq)" ]; then
		echo "SKIP: Could not run test; need jq tool"
		return $ksft_skip
	fi

	# Use status of existing neighbor entry when determining nexthop for
	# multipath routes.
	local -A gws
	gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2)
	local -A other_dev
	other_dev=([veth1]=veth3 [veth3]=veth1)

	run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
	run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
	run_cmd "$IP nexthop add id 1001 group 1/2"
	run_cmd "$IP ro add 172.16.101.0/24 nhid 1001"
	rc=0
	for dev in veth1 veth3; do
		match=0
		for h in {1..254}; do
			addr="172.16.101.$h"
			if [ "$(get_route_dev "$addr")" = "$dev" ]; then
				match=1
				break
			fi
		done
		if (( match == 0 )); then
			echo "SKIP: Did not find a route using device $dev"
			return $ksft_skip
		fi
		run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
		if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
			rc=1
			break
		fi
		run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
	done
	log_test $rc 0 "Use valid neighbor during multipath selection"

	run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete"
	run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete"
	run_cmd "$IP route get 172.16.101.1"
	# if we did not crash, success
	log_test $rc 0 "Multipath selection with no valid neighbor"
}

ipv6_mpath_select()
{
	local rc dev match h addr

	echo
	echo "IPv6 multipath selection"
	echo "------------------------"
	if [ ! -x "$(command -v jq)" ]; then
		echo "SKIP: Could not run test; need jq tool"
		return $ksft_skip
	fi

	# Use status of existing neighbor entry when determining nexthop for
	# multipath routes.
	local -A gws
	gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2)
	local -A other_dev
	other_dev=([veth1]=veth3 [veth3]=veth1)

	run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
	run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
	run_cmd "$IP nexthop add id 1001 group 1/2"
	run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001"
	rc=0
	for dev in veth1 veth3; do
		match=0
		for h in {1..65535}; do
			addr=$(printf "2001:db8:101::%x" $h)
			if [ "$(get_route_dev "$addr")" = "$dev" ]; then
				match=1
				break
			fi
		done
		if (( match == 0 )); then
			echo "SKIP: Did not find a route using device $dev"
			return $ksft_skip
		fi
		run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
		if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
			rc=1
			break
		fi
		run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
	done
	log_test $rc 0 "Use valid neighbor during multipath selection"

	run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete"
	run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete"
	run_cmd "$IP route get 2001:db8:101::1"
	# if we did not crash, success
	log_test $rc 0 "Multipath selection with no valid neighbor"
}

################################################################################
# basic operations (add, delete, replace) on nexthops and nexthop groups
#