Commit e8ac615f authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'ipv6-fix-socket-connection-with-dscp-fib-rules'

Guillaume Nault says:

====================
ipv6: Fix socket connection with DSCP fib-rules.

The "flowlabel" field of struct flowi6 is used to store both the actual
flow label and the DS Field (or Traffic Class). However the .connect
handlers of datagram and TCP sockets don't set the DS Field part when
doing their route lookup. This breaks fib-rules that match on DSCP.
====================

Link: https://lore.kernel.org/r/cover.1675875519.git.gnault@redhat.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 6e16e67a c21a20d9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
	fl6->flowi6_mark = sk->sk_mark;
	fl6->fl6_dport = inet->inet_dport;
	fl6->fl6_sport = inet->inet_sport;
	fl6->flowlabel = np->flow_label;
	fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
	fl6->flowi6_uid = sk->sk_uid;

	if (!oif)
+1 −0
Original line number Diff line number Diff line
@@ -272,6 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
	fl6.flowi6_proto = IPPROTO_TCP;
	fl6.daddr = sk->sk_v6_daddr;
	fl6.saddr = saddr ? *saddr : np->saddr;
	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
	fl6.flowi6_oif = sk->sk_bound_dev_if;
	fl6.flowi6_mark = sk->sk_mark;
	fl6.fl6_dport = usin->sin6_port;
+127 −1
Original line number Diff line number Diff line
@@ -10,8 +10,10 @@ ret=0

PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
IP="ip -netns testns"
IP_PEER="ip -netns peerns"

RTABLE=100
RTABLE_PEER=101
GW_IP4=192.51.100.2
SRC_IP=192.51.100.3
GW_IP6=2001:db8:1::2
@@ -20,7 +22,9 @@ SRC_IP6=2001:db8:1::3
DEV_ADDR=192.51.100.1
DEV_ADDR6=2001:db8:1::1
DEV=dummy0
TESTS="fib_rule6 fib_rule4"
TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect"

SELFTEST_PATH=""

log_test()
{
@@ -52,6 +56,31 @@ log_section()
	echo "######################################################################"
}

check_nettest()
{
	if which nettest > /dev/null 2>&1; then
		return 0
	fi

	# Add the selftest directory to PATH if not already done
	if [ "${SELFTEST_PATH}" = "" ]; then
		SELFTEST_PATH="$(dirname $0)"
		PATH="${PATH}:${SELFTEST_PATH}"

		# Now retry with the new path
		if which nettest > /dev/null 2>&1; then
			return 0
		fi

		if [ "${ret}" -eq 0 ]; then
			ret="${ksft_skip}"
		fi
		echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')"
	fi

	return 1
}

setup()
{
	set -e
@@ -72,6 +101,39 @@ cleanup()
	ip netns del testns
}

setup_peer()
{
	set -e

	ip netns add peerns
	$IP_PEER link set dev lo up

	ip link add name veth0 netns testns type veth \
		peer name veth1 netns peerns
	$IP link set dev veth0 up
	$IP_PEER link set dev veth1 up

	$IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0
	$IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1

	$IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad
	$IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad

	$IP_PEER address add 198.51.100.11/32 dev lo
	$IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11

	$IP_PEER address add 2001:db8::1:11/128 dev lo
	$IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11

	set +e
}

cleanup_peer()
{
	$IP link del dev veth0
	ip netns del peerns
}

fib_check_iproute_support()
{
	ip rule help 2>&1 | grep -q $1
@@ -190,6 +252,37 @@ fib_rule6_test()
	fi
}

# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly
# taken into account when connecting the socket and when sending packets.
fib_rule6_connect_test()
{
	local dsfield

	if ! check_nettest; then
		echo "SKIP: Could not run test without nettest tool"
		return
	fi

	setup_peer
	$IP -6 rule add dsfield 0x04 table $RTABLE_PEER

	# Combine the base DS Field value (0x04) with all possible ECN values
	# (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
	# The ECN bits shouldn't influence the result of the test.
	for dsfield in 0x04 0x05 0x06 0x07; do
		nettest -q -6 -B -t 5 -N testns -O peerns -U -D \
			-Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11
		log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})"

		nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \
			-l 2001:db8::1:11 -r 2001:db8::1:11
		log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
	done

	$IP -6 rule del dsfield 0x04 table $RTABLE_PEER
	cleanup_peer
}

fib_rule4_del()
{
	$IP rule del $1
@@ -296,6 +389,37 @@ fib_rule4_test()
	fi
}

# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken
# into account when connecting the socket and when sending packets.
fib_rule4_connect_test()
{
	local dsfield

	if ! check_nettest; then
		echo "SKIP: Could not run test without nettest tool"
		return
	fi

	setup_peer
	$IP -4 rule add dsfield 0x04 table $RTABLE_PEER

	# Combine the base DS Field value (0x04) with all possible ECN values
	# (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
	# The ECN bits shouldn't influence the result of the test.
	for dsfield in 0x04 0x05 0x06 0x07; do
		nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \
			-l 198.51.100.11 -r 198.51.100.11
		log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})"

		nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \
			-l 198.51.100.11 -r 198.51.100.11
		log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
	done

	$IP -4 rule del dsfield 0x04 table $RTABLE_PEER
	cleanup_peer
}

run_fibrule_tests()
{
	log_section "IPv4 fib rule"
@@ -345,6 +469,8 @@ do
	case $t in
	fib_rule6_test|fib_rule6)		fib_rule6_test;;
	fib_rule4_test|fib_rule4)		fib_rule4_test;;
	fib_rule6_connect_test|fib_rule6_connect)	fib_rule6_connect_test;;
	fib_rule4_connect_test|fib_rule4_connect)	fib_rule4_connect_test;;

	help) echo "Test names: $TESTS"; exit 0;;

+50 −1
Original line number Diff line number Diff line
@@ -87,6 +87,7 @@ struct sock_args {
	int use_setsockopt;
	int use_freebind;
	int use_cmsg;
	uint8_t dsfield;
	const char *dev;
	const char *server_dev;
	int ifindex;
@@ -580,6 +581,36 @@ static int set_reuseaddr(int sd)
	return rc;
}

static int set_dsfield(int sd, int version, int dsfield)
{
	if (!dsfield)
		return 0;

	switch (version) {
	case AF_INET:
		if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield,
			       sizeof(dsfield)) < 0) {
			log_err_errno("setsockopt(IP_TOS)");
			return -1;
		}
		break;

	case AF_INET6:
		if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield,
			       sizeof(dsfield)) < 0) {
			log_err_errno("setsockopt(IPV6_TCLASS)");
			return -1;
		}
		break;

	default:
		log_error("Invalid address family\n");
		return -1;
	}

	return 0;
}

static int str_to_uint(const char *str, int min, int max, unsigned int *value)
{
	int number;
@@ -1317,6 +1348,9 @@ static int msock_init(struct sock_args *args, int server)
		       (char *)&one, sizeof(one)) < 0)
		log_err_errno("Setting SO_BROADCAST error");

	if (set_dsfield(sd, AF_INET, args->dsfield) != 0)
		goto out_err;

	if (args->dev && bind_to_device(sd, args->dev) != 0)
		goto out_err;
	else if (args->use_setsockopt &&
@@ -1445,6 +1479,9 @@ static int lsock_init(struct sock_args *args)
	if (set_reuseport(sd) != 0)
		goto err;

	if (set_dsfield(sd, args->version, args->dsfield) != 0)
		goto err;

	if (args->dev && bind_to_device(sd, args->dev) != 0)
		goto err;
	else if (args->use_setsockopt &&
@@ -1658,6 +1695,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
	if (set_reuseport(sd) != 0)
		goto err;

	if (set_dsfield(sd, args->version, args->dsfield) != 0)
		goto err;

	if (args->dev && bind_to_device(sd, args->dev) != 0)
		goto err;
	else if (args->use_setsockopt &&
@@ -1862,7 +1902,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
	return client_status;
}

#define GETOPT_STR  "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
#define GETOPT_STR  "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
#define OPT_FORCE_BIND_KEY_IFINDEX 1001
#define OPT_NO_BIND_KEY_IFINDEX 1002

@@ -1893,6 +1933,8 @@ static void print_usage(char *prog)
	"    -D|R          datagram (D) / raw (R) socket (default stream)\n"
	"    -l addr       local address to bind to in server mode\n"
	"    -c addr       local address to bind to in client mode\n"
	"    -Q dsfield    DS Field value of the socket (the IP_TOS or\n"
	"                  IPV6_TCLASS socket option)\n"
	"    -x            configure XFRM policy on socket\n"
	"\n"
	"    -d dev        bind socket to given device name\n"
@@ -1971,6 +2013,13 @@ int main(int argc, char *argv[])
			args.has_local_ip = 1;
			args.client_local_addr_str = optarg;
			break;
		case 'Q':
			if (str_to_uint(optarg, 0, 255, &tmp) != 0) {
				fprintf(stderr, "Invalid DS Field\n");
				return 1;
			}
			args.dsfield = tmp;
			break;
		case 'p':
			if (str_to_uint(optarg, 1, 65535, &tmp) != 0) {
				fprintf(stderr, "Invalid port\n");