Commit 97f7d3dd authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'mptcp-add-mixed-v4-v6-support-for-the-in-kernel-pm'

Matthieu Baerts says:

====================
mptcp: add mixed v4/v6 support for the in-kernel PM

Before these patches, the in-kernel Path-Manager would not allow, for
the same MPTCP connection, having a mix of subflows in v4 and v6.

MPTCP's RFC 8684 doesn't forbid that and it is even recommended to do so
as the path in v4 and v6 are likely different. Some networks are also
v4 or v6 only, we cannot assume they all have both v4 and v6 support.

Patch 1 then removes this artificial constraint in the in-kernel PM
currently enforcing there are no mixed subflows in place, either in
address announcement or in subflow creation areas.

Patch 2 makes sure the sk_ipv6only attribute is also propagated to
subflows, just in case a new PM wouldn't respect it.

Some selftests have also been added for the in-kernel PM (patch 3).

Patches 4 to 8 are just some cleanups and small improvements in the
printed messages in the userspace PM. It is not linked to the rest but
identified when working on a related patch modifying this selftest,
already in -net:

  commit 4656d72c ("selftests: mptcp: userspace: validate v4-v6 subflows mix")
---
====================

Link: https://lore.kernel.org/r/20230123-upstream-net-next-pm-v4-v6-v1-0-43fac502bfbf@tessares.net


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents d0941130 8dbdf24f
Loading
Loading
Loading
Loading
+31 −27
Original line number Diff line number Diff line
@@ -152,7 +152,6 @@ static struct mptcp_pm_addr_entry *
select_local_address(const struct pm_nl_pernet *pernet,
		     const struct mptcp_sock *msk)
{
	const struct sock *sk = (const struct sock *)msk;
	struct mptcp_pm_addr_entry *entry, *ret = NULL;

	msk_owned_by_me(msk);
@@ -165,16 +164,6 @@ select_local_address(const struct pm_nl_pernet *pernet,
		if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
			continue;

		if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
			if ((entry->addr.family == AF_INET &&
			     !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
			    (sk->sk_family == AF_INET &&
			     !ipv6_addr_v4mapped(&entry->addr.addr6)))
#endif
				continue;
		}

		ret = entry;
		break;
	}
@@ -423,7 +412,9 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned
/* Fill all the remote addresses into the array addrs[],
 * and return the array size.
 */
static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
					      struct mptcp_addr_info *local,
					      bool fullmesh,
					      struct mptcp_addr_info *addrs)
{
	bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
@@ -443,6 +434,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
		if (deny_id0)
			return 0;

		if (!mptcp_pm_addr_families_match(sk, local, &remote))
			return 0;

		msk->pm.subflows++;
		addrs[i++] = remote;
	} else {
@@ -453,6 +447,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
			if (deny_id0 && !addrs[i].id)
				continue;

			if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
				continue;

			if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
			    msk->pm.subflows < subflows_max) {
				msk->pm.subflows++;
@@ -603,9 +600,11 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
		fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);

		msk->pm.local_addr_used++;
		nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
		if (nr)
		__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
		nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
		if (nr == 0)
			continue;

		spin_unlock_bh(&msk->pm.lock);
		for (i = 0; i < nr; i++)
			__mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
@@ -628,11 +627,11 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
 * and return the array size.
 */
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
					     struct mptcp_addr_info *remote,
					     struct mptcp_addr_info *addrs)
{
	struct sock *sk = (struct sock *)msk;
	struct mptcp_pm_addr_entry *entry;
	struct mptcp_addr_info local;
	struct pm_nl_pernet *pernet;
	unsigned int subflows_max;
	int i = 0;
@@ -645,15 +644,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
			continue;

		if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
			if ((entry->addr.family == AF_INET &&
			     !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
			    (sk->sk_family == AF_INET &&
			     !ipv6_addr_v4mapped(&entry->addr.addr6)))
#endif
		if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
			continue;
		}

		if (msk->pm.subflows < subflows_max) {
			msk->pm.subflows++;
@@ -666,8 +658,18 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
	 * 'IPADDRANY' local address
	 */
	if (!i) {
		struct mptcp_addr_info local;

		memset(&local, 0, sizeof(local));
		local.family = msk->pm.remote.family;
		local.family =
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
			       remote->family == AF_INET6 &&
			       ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
#endif
			       remote->family;

		if (!mptcp_pm_addr_families_match(sk, &local, remote))
			return 0;

		msk->pm.subflows++;
		addrs[i++] = local;
@@ -706,7 +708,9 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
	/* connect to the specified remote address, using whatever
	 * local address the routing configuration will pick.
	 */
	nr = fill_local_addresses_vec(msk, addrs);
	nr = fill_local_addresses_vec(msk, &remote, addrs);
	if (nr == 0)
		return;

	msk->pm.add_addr_accepted++;
	if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+3 −2
Original line number Diff line number Diff line
@@ -59,8 +59,8 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
		 */
		e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
		if (!e) {
			spin_unlock_bh(&msk->pm.lock);
			return -ENOMEM;
			ret = -ENOMEM;
			goto append_err;
		}

		*e = *entry;
@@ -74,6 +74,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
		ret = entry->addr.id;
	}

append_err:
	spin_unlock_bh(&msk->pm.lock);
	return ret;
}
+1 −0
Original line number Diff line number Diff line
@@ -1255,6 +1255,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
	ssk->sk_priority = sk->sk_priority;
	ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
	ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
	ssk->sk_ipv6only = sk->sk_ipv6only;
	__ip_sock_set_tos(ssk, inet_sk(sk)->tos);

	if (sk->sk_userlocks & tx_rx_locks) {
+44 −9
Original line number Diff line number Diff line
@@ -774,24 +774,17 @@ do_transfer()
		addr_nr_ns2=${addr_nr_ns2:9}
	fi

	local local_addr
	if is_v6 "${connect_addr}"; then
		local_addr="::"
	else
		local_addr="0.0.0.0"
	fi

	extra_srv_args="$extra_args $extra_srv_args"
	if [ "$test_link_fail" -gt 1 ];then
		timeout ${timeout_test} \
			ip netns exec ${listener_ns} \
				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
					$extra_srv_args ${local_addr} < "$sinfail" > "$sout" &
					$extra_srv_args "::" < "$sinfail" > "$sout" &
	else
		timeout ${timeout_test} \
			ip netns exec ${listener_ns} \
				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
					$extra_srv_args ${local_addr} < "$sin" > "$sout" &
					$extra_srv_args "::" < "$sin" > "$sout" &
	fi
	local spid=$!

@@ -2448,6 +2441,47 @@ v4mapped_tests()
	fi
}

mixed_tests()
{
	if reset "IPv4 sockets do not use IPv6 addresses"; then
		pm_nl_set_limits $ns1 0 1
		pm_nl_set_limits $ns2 1 1
		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
		chk_join_nr 0 0 0
	fi

	# Need an IPv6 mptcp socket to allow subflows of both families
	if reset "simult IPv4 and IPv6 subflows"; then
		pm_nl_set_limits $ns1 0 1
		pm_nl_set_limits $ns2 1 1
		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
		run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow
		chk_join_nr 1 1 1
	fi

	# cross families subflows will not be created even in fullmesh mode
	if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1"; then
		pm_nl_set_limits $ns1 0 4
		pm_nl_set_limits $ns2 1 4
		pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh
		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
		run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow
		chk_join_nr 1 1 1
	fi

	# fullmesh still tries to create all the possibly subflows with
	# matching family
	if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2"; then
		pm_nl_set_limits $ns1 0 4
		pm_nl_set_limits $ns2 2 4
		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
		run_tests $ns1 $ns2 dead:beef:1::1 0 0 fullmesh_1 slow
		chk_join_nr 4 4 4
	fi
}

backup_tests()
{
	# single subflow, backup
@@ -3120,6 +3154,7 @@ all_tests_sorted=(
	a@add_tests
	6@ipv6_tests
	4@v4mapped_tests
	M@mixed_tests
	b@backup_tests
	p@add_addr_ports_tests
	k@syncookies_tests
+92 −61
Original line number Diff line number Diff line
@@ -43,41 +43,40 @@ rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX)
ns1="ns1-$rndh"
ns2="ns2-$rndh"

print_title()
{
	stdbuf -o0 -e0 printf "INFO: %s\n" "${1}"
}

kill_wait()
{
	[ $1 -eq 0 ] && return 0

	kill -SIGUSR1 $1 > /dev/null 2>&1
	kill $1 > /dev/null 2>&1
	wait $1 2>/dev/null
}

cleanup()
{
	echo "cleanup"

	rm -rf $file $client_evts $server_evts
	print_title "Cleanup"

	# Terminate the MPTCP connection and related processes
	if [ $client4_pid -ne 0 ]; then
		kill -SIGUSR1 $client4_pid > /dev/null 2>&1
	fi
	if [ $server4_pid -ne 0 ]; then
		kill_wait $server4_pid
	fi
	if [ $client6_pid -ne 0 ]; then
		kill -SIGUSR1 $client6_pid > /dev/null 2>&1
	fi
	if [ $server6_pid -ne 0 ]; then
		kill_wait $server6_pid
	fi
	if [ $server_evts_pid -ne 0 ]; then
		kill_wait $server_evts_pid
	fi
	if [ $client_evts_pid -ne 0 ]; then
		kill_wait $client_evts_pid
	fi
	local pid
	for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
		   $server_evts_pid $client_evts_pid
	do
		kill_wait $pid
	done

	local netns
	for netns in "$ns1" "$ns2" ;do
		ip netns del "$netns"
	done

	rm -rf $file $client_evts $server_evts

	stdbuf -o0 -e0 printf "Done\n"
}

trap cleanup EXIT
@@ -108,6 +107,7 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
ip -net "$ns2" link set ns2eth1 up

print_title "Init"
stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2         \t\t\t[OK]\n"

make_file()
@@ -193,11 +193,16 @@ make_connection()
	server_serverside=$(grep "type:1," "$server_evts" |
			    sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')

	stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1    \t\t" $is_v6
	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
		   [ "$server_serverside" = 1 ]
	then
		stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1    \t\t[OK]\n" $is_v6
		stdbuf -o0 -e0 printf "[OK]\n"
	else
		stdbuf -o0 -e0 printf "[FAIL]\n"
		stdbuf -o0 -e0 printf "\tExpected tokens (c:%s - s:%s) and server (c:%d - s:%d)\n" \
			"${client_token}" "${server_token}" \
			"${client_serverside}" "${server_serverside}"
		exit 1
	fi

@@ -217,6 +222,48 @@ make_connection()
	fi
}

# $1: var name ; $2: prev ret
check_expected_one()
{
	local var="${1}"
	local exp="e_${var}"
	local prev_ret="${2}"

	if [ "${!var}" = "${!exp}" ]
	then
		return 0
	fi

	if [ "${prev_ret}" = "0" ]
	then
		stdbuf -o0 -e0 printf "[FAIL]\n"
	fi

	stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \
		"${var}" "${!var}" "${!exp}"
	return 1
}

# $@: all var names to check
check_expected()
{
	local ret=0
	local var

	for var in "${@}"
	do
		check_expected_one "${var}" "${ret}" || ret=1
	done

	if [ ${ret} -eq 0 ]
	then
		stdbuf -o0 -e0 printf "[OK]\n"
		return 0
	fi

	exit 1
}

verify_announce_event()
{
	local evt=$1
@@ -242,19 +289,14 @@ verify_announce_event()
	fi
	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
		   [ "$addr" = "$e_addr" ] && [ "$dport" = "$e_dport" ] &&
		   [ "$id" = "$e_id" ]
	then
		stdbuf -o0 -e0 printf "[OK]\n"
		return 0
	fi
	stdbuf -o0 -e0 printf "[FAIL]\n"
	exit 1

	check_expected "type" "token" "addr" "dport" "id"
}

test_announce()
{
	print_title "Announce tests"

	# Capture events on the network namespace running the server
	:>"$server_evts"

@@ -270,7 +312,7 @@ test_announce()
	then
		stdbuf -o0 -e0 printf "[OK]\n"
	else
		stdbuf -o0 -e0 printf "[FAIL]\n"
		stdbuf -o0 -e0 printf "[FAIL]\n\ttype defined: %s\n" "${type}"
		exit 1
	fi

@@ -347,18 +389,14 @@ verify_remove_event()
	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
		   [ "$id" = "$e_id" ]
	then
		stdbuf -o0 -e0 printf "[OK]\n"
		return 0
	fi
	stdbuf -o0 -e0 printf "[FAIL]\n"
	exit 1

	check_expected "type" "token" "id"
}

test_remove()
{
	print_title "Remove tests"

	# Capture events on the network namespace running the server
	:>"$server_evts"

@@ -507,20 +545,13 @@ verify_subflow_events()
		daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
	fi

	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
		   [ "$daddr" = "$e_daddr" ] && [ "$e_dport" = "$dport" ] &&
		   [ "$family" = "$e_family" ] && [ "$saddr" = "$e_saddr" ] &&
		   [ "$e_locid" = "$locid" ] && [ "$e_remid" = "$remid" ]
	then
		stdbuf -o0 -e0 printf "[OK]\n"
		return 0
	fi
	stdbuf -o0 -e0 printf "[FAIL]\n"
	exit 1
	check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid"
}

test_subflows()
{
	print_title "Subflows v4 or v6 only tests"

	# Capture events on the network namespace running the server
	:>"$server_evts"

@@ -754,6 +785,8 @@ test_subflows()

test_subflows_v4_v6_mix()
{
	print_title "Subflows v4 and v6 mix tests"

	# Attempt to add a listener at 10.0.2.1:<subflow-port>
	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
	   $app6_port > /dev/null 2>&1 &
@@ -800,6 +833,8 @@ test_subflows_v4_v6_mix()

test_prio()
{
	print_title "Prio tests"

	local count

	# Send MP_PRIO signal from client to server machine
@@ -811,7 +846,7 @@ test_prio()
	count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
	[ -z "$count" ] && count=0
	if [ $count != 1 ]; then
		stdbuf -o0 -e0 printf "[FAIL]\n"
		stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}"
		exit 1
	else
		stdbuf -o0 -e0 printf "[OK]\n"
@@ -822,7 +857,7 @@ test_prio()
	count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
	[ -z "$count" ] && count=0
	if [ $count != 1 ]; then
		stdbuf -o0 -e0 printf "[FAIL]\n"
		stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}"
		exit 1
	else
		stdbuf -o0 -e0 printf "[OK]\n"
@@ -863,19 +898,13 @@ verify_listener_events()
			sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
	fi

	if [ $type ] && [ $type = $e_type ] &&
	   [ $family ] && [ $family = $e_family ] &&
	   [ $saddr ] && [ $saddr = $e_saddr ] &&
	   [ $sport ] && [ $sport = $e_sport ]; then
		stdbuf -o0 -e0 printf "[OK]\n"
		return 0
	fi
	stdbuf -o0 -e0 printf "[FAIL]\n"
	exit 1
	check_expected "type" "family" "saddr" "sport"
}

test_listener()
{
	print_title "Listener tests"

	# Capture events on the network namespace running the client
	:>$client_evts

@@ -902,8 +931,10 @@ test_listener()
	verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
}

print_title "Make connections"
make_connection
make_connection "v6"

test_announce
test_remove
test_subflows