Commit 31123c03 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov
Browse files

selftests/bpf: bpf_setsockopt tests



This patch adds tests to exercise optnames that are allowed
in bpf_setsockopt().

Reviewed-by: default avatarStanislav Fomichev <sdf@google.com>
Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/r/20220817061847.4182339-1-kafai@fb.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 7e41df5d
Loading
Loading
Loading
Loading
+125 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) Meta Platforms, Inc. and affiliates. */

#define _GNU_SOURCE
#include <sched.h>
#include <linux/socket.h>
#include <net/if.h>

#include "test_progs.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"

#include "setget_sockopt.skel.h"

#define CG_NAME "/setget-sockopt-test"

static const char addr4_str[] = "127.0.0.1";
static const char addr6_str[] = "::1";
static struct setget_sockopt *skel;
static int cg_fd;

static int create_netns(void)
{
	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
		return -1;

	if (!ASSERT_OK(system("ip link set dev lo up"), "set lo up"))
		return -1;

	if (!ASSERT_OK(system("ip link add dev binddevtest1 type veth peer name binddevtest2"),
		       "add veth"))
		return -1;

	if (!ASSERT_OK(system("ip link set dev binddevtest1 up"),
		       "bring veth up"))
		return -1;

	return 0;
}

static void test_tcp(int family)
{
	struct setget_sockopt__bss *bss = skel->bss;
	int sfd, cfd;

	memset(bss, 0, sizeof(*bss));

	sfd = start_server(family, SOCK_STREAM,
			   family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
	if (!ASSERT_GE(sfd, 0, "start_server"))
		return;

	cfd = connect_to_fd(sfd, 0);
	if (!ASSERT_GE(cfd, 0, "connect_to_fd_server")) {
		close(sfd);
		return;
	}
	close(sfd);
	close(cfd);

	ASSERT_EQ(bss->nr_listen, 1, "nr_listen");
	ASSERT_EQ(bss->nr_connect, 1, "nr_connect");
	ASSERT_EQ(bss->nr_active, 1, "nr_active");
	ASSERT_EQ(bss->nr_passive, 1, "nr_passive");
	ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create");
	ASSERT_EQ(bss->nr_binddev, 2, "nr_bind");
}

static void test_udp(int family)
{
	struct setget_sockopt__bss *bss = skel->bss;
	int sfd;

	memset(bss, 0, sizeof(*bss));

	sfd = start_server(family, SOCK_DGRAM,
			   family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
	if (!ASSERT_GE(sfd, 0, "start_server"))
		return;
	close(sfd);

	ASSERT_GE(bss->nr_socket_post_create, 1, "nr_socket_post_create");
	ASSERT_EQ(bss->nr_binddev, 1, "nr_bind");
}

void test_setget_sockopt(void)
{
	cg_fd = test__join_cgroup(CG_NAME);
	if (cg_fd < 0)
		return;

	if (create_netns())
		goto done;

	skel = setget_sockopt__open();
	if (!ASSERT_OK_PTR(skel, "open skel"))
		goto done;

	strcpy(skel->rodata->veth, "binddevtest1");
	skel->rodata->veth_ifindex = if_nametoindex("binddevtest1");
	if (!ASSERT_GT(skel->rodata->veth_ifindex, 0, "if_nametoindex"))
		goto done;

	if (!ASSERT_OK(setget_sockopt__load(skel), "load skel"))
		goto done;

	skel->links.skops_sockopt =
		bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
	if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
		goto done;

	skel->links.socket_post_create =
		bpf_program__attach_cgroup(skel->progs.socket_post_create, cg_fd);
	if (!ASSERT_OK_PTR(skel->links.socket_post_create, "attach_cgroup"))
		goto done;

	test_tcp(AF_INET6);
	test_tcp(AF_INET);
	test_udp(AF_INET6);
	test_udp(AF_INET);

done:
	setget_sockopt__destroy(skel);
	close(cg_fd);
}
+30 −1
Original line number Diff line number Diff line
@@ -6,13 +6,40 @@
#define AF_INET6		10

#define SOL_SOCKET		1
#define SO_REUSEADDR		2
#define SO_SNDBUF		7
#define __SO_ACCEPTCON		(1 << 16)
#define SO_RCVBUF		8
#define SO_KEEPALIVE		9
#define SO_PRIORITY		12
#define SO_REUSEPORT		15
#define SO_RCVLOWAT		18
#define SO_BINDTODEVICE		25
#define SO_MARK			36
#define SO_MAX_PACING_RATE	47
#define SO_BINDTOIFINDEX	62
#define SO_TXREHASH		74
#define __SO_ACCEPTCON		(1 << 16)

#define IP_TOS			1

#define IPV6_TCLASS		67
#define IPV6_AUTOFLOWLABEL	70

#define SOL_TCP			6
#define TCP_NODELAY		1
#define TCP_MAXSEG		2
#define TCP_KEEPIDLE		4
#define TCP_KEEPINTVL		5
#define TCP_KEEPCNT		6
#define TCP_SYNCNT		7
#define TCP_WINDOW_CLAMP	10
#define TCP_CONGESTION		13
#define TCP_THIN_LINEAR_TIMEOUTS	16
#define TCP_USER_TIMEOUT	18
#define TCP_NOTSENT_LOWAT	25
#define TCP_SAVE_SYN		27
#define TCP_CA_NAME_MAX		16
#define TCP_NAGLE_OFF		1

#define ICSK_TIME_RETRANS	1
#define ICSK_TIME_PROBE0	3
@@ -49,6 +76,8 @@
#define sk_state		__sk_common.skc_state
#define sk_v6_daddr		__sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
#define sk_flags		__sk_common.skc_flags
#define sk_reuse		__sk_common.skc_reuse

#define s6_addr32		in6_u.u6_addr32

+451 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) Meta Platforms, Inc. and affiliates. */

#include "vmlinux.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif

extern unsigned long CONFIG_HZ __kconfig;

const volatile char veth[IFNAMSIZ];
const volatile int veth_ifindex;

int nr_listen;
int nr_passive;
int nr_active;
int nr_connect;
int nr_binddev;
int nr_socket_post_create;

struct sockopt_test {
	int opt;
	int new;
	int restore;
	int expected;
	int tcp_expected;
	unsigned int flip:1;
};

static const char cubic_cc[] = "cubic";
static const char reno_cc[] = "reno";

static const struct sockopt_test sol_socket_tests[] = {
	{ .opt = SO_REUSEADDR, .flip = 1, },
	{ .opt = SO_SNDBUF, .new = 8123, .expected = 8123 * 2, },
	{ .opt = SO_RCVBUF, .new = 8123, .expected = 8123 * 2, },
	{ .opt = SO_KEEPALIVE, .flip = 1, },
	{ .opt = SO_PRIORITY, .new = 0xeb9f, .expected = 0xeb9f, },
	{ .opt = SO_REUSEPORT, .flip = 1, },
	{ .opt = SO_RCVLOWAT, .new = 8123, .expected = 8123, },
	{ .opt = SO_MARK, .new = 0xeb9f, .expected = 0xeb9f, },
	{ .opt = SO_MAX_PACING_RATE, .new = 0xeb9f, .expected = 0xeb9f, },
	{ .opt = SO_TXREHASH, .flip = 1, },
	{ .opt = 0, },
};

static const struct sockopt_test sol_tcp_tests[] = {
	{ .opt = TCP_NODELAY, .flip = 1, },
	{ .opt = TCP_MAXSEG, .new = 1314, .expected = 1314, },
	{ .opt = TCP_KEEPIDLE, .new = 123, .expected = 123, .restore = 321, },
	{ .opt = TCP_KEEPINTVL, .new = 123, .expected = 123, .restore = 321, },
	{ .opt = TCP_KEEPCNT, .new = 123, .expected = 123, .restore = 124, },
	{ .opt = TCP_SYNCNT, .new = 123, .expected = 123, .restore = 124, },
	{ .opt = TCP_WINDOW_CLAMP, .new = 8123, .expected = 8123, .restore = 8124, },
	{ .opt = TCP_CONGESTION, },
	{ .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
	{ .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
	{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
	{ .opt = TCP_SAVE_SYN, .new = 1, .expected = 1, },
	{ .opt = 0, },
};

static const struct sockopt_test sol_ip_tests[] = {
	{ .opt = IP_TOS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
	{ .opt = 0, },
};

static const struct sockopt_test sol_ipv6_tests[] = {
	{ .opt = IPV6_TCLASS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
	{ .opt = IPV6_AUTOFLOWLABEL, .flip = 1, },
	{ .opt = 0, },
};

struct loop_ctx {
	void *ctx;
	struct sock *sk;
};

static int __bpf_getsockopt(void *ctx, struct sock *sk,
			    int level, int opt, int *optval,
			    int optlen)
{
	if (level == SOL_SOCKET) {
		switch (opt) {
		case SO_REUSEADDR:
			*optval = !!BPF_CORE_READ_BITFIELD(sk, sk_reuse);
			break;
		case SO_KEEPALIVE:
			*optval = !!(sk->sk_flags & (1UL << 3));
			break;
		case SO_RCVLOWAT:
			*optval = sk->sk_rcvlowat;
			break;
		case SO_MAX_PACING_RATE:
			*optval = sk->sk_max_pacing_rate;
			break;
		default:
			return bpf_getsockopt(ctx, level, opt, optval, optlen);
		}
		return 0;
	}

	if (level == IPPROTO_TCP) {
		struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk);

		if (!tp)
			return -1;

		switch (opt) {
		case TCP_NODELAY:
			*optval = !!(BPF_CORE_READ_BITFIELD(tp, nonagle) & TCP_NAGLE_OFF);
			break;
		case TCP_MAXSEG:
			*optval = tp->rx_opt.user_mss;
			break;
		case TCP_KEEPIDLE:
			*optval = tp->keepalive_time / CONFIG_HZ;
			break;
		case TCP_SYNCNT:
			*optval = tp->inet_conn.icsk_syn_retries;
			break;
		case TCP_KEEPINTVL:
			*optval = tp->keepalive_intvl / CONFIG_HZ;
			break;
		case TCP_KEEPCNT:
			*optval = tp->keepalive_probes;
			break;
		case TCP_WINDOW_CLAMP:
			*optval = tp->window_clamp;
			break;
		case TCP_THIN_LINEAR_TIMEOUTS:
			*optval = !!BPF_CORE_READ_BITFIELD(tp, thin_lto);
			break;
		case TCP_USER_TIMEOUT:
			*optval = tp->inet_conn.icsk_user_timeout;
			break;
		case TCP_NOTSENT_LOWAT:
			*optval = tp->notsent_lowat;
			break;
		case TCP_SAVE_SYN:
			*optval = BPF_CORE_READ_BITFIELD(tp, save_syn);
			break;
		default:
			return bpf_getsockopt(ctx, level, opt, optval, optlen);
		}
		return 0;
	}

	if (level == IPPROTO_IPV6) {
		switch (opt) {
		case IPV6_AUTOFLOWLABEL: {
			__u16 proto = sk->sk_protocol;
			struct inet_sock *inet_sk;

			if (proto == IPPROTO_TCP)
				inet_sk = (struct inet_sock *)bpf_skc_to_tcp_sock(sk);
			else
				inet_sk = (struct inet_sock *)bpf_skc_to_udp6_sock(sk);

			if (!inet_sk)
				return -1;

			*optval = !!inet_sk->pinet6->autoflowlabel;
			break;
		}
		default:
			return bpf_getsockopt(ctx, level, opt, optval, optlen);
		}
		return 0;
	}

	return bpf_getsockopt(ctx, level, opt, optval, optlen);
}

static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
				 const struct sockopt_test *t,
				 int level)
{
	int old, tmp, new, opt = t->opt;

	opt = t->opt;

	if (__bpf_getsockopt(ctx, sk, level, opt, &old, sizeof(old)))
		return 1;
	/* kernel initialized txrehash to 255 */
	if (level == SOL_SOCKET && opt == SO_TXREHASH && old != 0 && old != 1)
		old = 1;

	new = !old;
	if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
		return 1;
	if (__bpf_getsockopt(ctx, sk, level, opt, &tmp, sizeof(tmp)) ||
	    tmp != new)
		return 1;

	if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
		return 1;

	return 0;
}

static int bpf_test_sockopt_int(void *ctx, struct sock *sk,
				const struct sockopt_test *t,
				int level)
{
	int old, tmp, new, expected, opt;

	opt = t->opt;
	new = t->new;
	if (sk->sk_type == SOCK_STREAM && t->tcp_expected)
		expected = t->tcp_expected;
	else
		expected = t->expected;

	if (__bpf_getsockopt(ctx, sk, level, opt, &old, sizeof(old)) ||
	    old == new)
		return 1;

	if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
		return 1;
	if (__bpf_getsockopt(ctx, sk, level, opt, &tmp, sizeof(tmp)) ||
	    tmp != expected)
		return 1;

	if (t->restore)
		old = t->restore;
	if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
		return 1;

	return 0;
}

static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc)
{
	const struct sockopt_test *t;

	if (i >= ARRAY_SIZE(sol_socket_tests))
		return 1;

	t = &sol_socket_tests[i];
	if (!t->opt)
		return 1;

	if (t->flip)
		return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, SOL_SOCKET);

	return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET);
}

static int bpf_test_ip_sockopt(__u32 i, struct loop_ctx *lc)
{
	const struct sockopt_test *t;

	if (i >= ARRAY_SIZE(sol_ip_tests))
		return 1;

	t = &sol_ip_tests[i];
	if (!t->opt)
		return 1;

	if (t->flip)
		return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IP);

	return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IP);
}

static int bpf_test_ipv6_sockopt(__u32 i, struct loop_ctx *lc)
{
	const struct sockopt_test *t;

	if (i >= ARRAY_SIZE(sol_ipv6_tests))
		return 1;

	t = &sol_ipv6_tests[i];
	if (!t->opt)
		return 1;

	if (t->flip)
		return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IPV6);

	return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IPV6);
}

static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc)
{
	const struct sockopt_test *t;
	struct sock *sk;
	void *ctx;

	if (i >= ARRAY_SIZE(sol_tcp_tests))
		return 1;

	t = &sol_tcp_tests[i];
	if (!t->opt)
		return 1;

	ctx = lc->ctx;
	sk = lc->sk;

	if (t->opt == TCP_CONGESTION) {
		char old_cc[16], tmp_cc[16];
		const char *new_cc;

		if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
			return 1;
		if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc))
			new_cc = reno_cc;
		else
			new_cc = cubic_cc;
		if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, (void *)new_cc,
				   sizeof(new_cc)))
			return 1;
		if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, tmp_cc, sizeof(tmp_cc)))
			return 1;
		if (bpf_strncmp(tmp_cc, sizeof(tmp_cc), new_cc))
			return 1;
		if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
			return 1;
		return 0;
	}

	if (t->flip)
		return bpf_test_sockopt_flip(ctx, sk, t, IPPROTO_TCP);

	return bpf_test_sockopt_int(ctx, sk, t, IPPROTO_TCP);
}

static int bpf_test_sockopt(void *ctx, struct sock *sk)
{
	struct loop_ctx lc = { .ctx = ctx, .sk = sk, };
	__u16 family, proto;
	int n;

	family = sk->sk_family;
	proto = sk->sk_protocol;

	n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0);
	if (n != ARRAY_SIZE(sol_socket_tests))
		return -1;

	if (proto == IPPROTO_TCP) {
		n = bpf_loop(ARRAY_SIZE(sol_tcp_tests), bpf_test_tcp_sockopt, &lc, 0);
		if (n != ARRAY_SIZE(sol_tcp_tests))
			return -1;
	}

	if (family == AF_INET) {
		n = bpf_loop(ARRAY_SIZE(sol_ip_tests), bpf_test_ip_sockopt, &lc, 0);
		if (n != ARRAY_SIZE(sol_ip_tests))
			return -1;
	} else {
		n = bpf_loop(ARRAY_SIZE(sol_ipv6_tests), bpf_test_ipv6_sockopt, &lc, 0);
		if (n != ARRAY_SIZE(sol_ipv6_tests))
			return -1;
	}

	return 0;
}

static int binddev_test(void *ctx)
{
	const char empty_ifname[] = "";
	int ifindex, zero = 0;

	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
			   (void *)veth, sizeof(veth)))
		return -1;
	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
			   &ifindex, sizeof(int)) ||
	    ifindex != veth_ifindex)
		return -1;

	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
			   (void *)empty_ifname, sizeof(empty_ifname)))
		return -1;
	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
			   &ifindex, sizeof(int)) ||
	    ifindex != 0)
		return -1;

	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
			   (void *)&veth_ifindex, sizeof(int)))
		return -1;
	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
			   &ifindex, sizeof(int)) ||
	    ifindex != veth_ifindex)
		return -1;

	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
			   &zero, sizeof(int)))
		return -1;
	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
			   &ifindex, sizeof(int)) ||
	    ifindex != 0)
		return -1;

	return 0;
}

SEC("lsm_cgroup/socket_post_create")
int BPF_PROG(socket_post_create, struct socket *sock, int family,
	     int type, int protocol, int kern)
{
	struct sock *sk = sock->sk;

	if (!sk)
		return 1;

	nr_socket_post_create += !bpf_test_sockopt(sk, sk);
	nr_binddev += !binddev_test(sk);

	return 1;
}

SEC("sockops")
int skops_sockopt(struct bpf_sock_ops *skops)
{
	struct bpf_sock *bpf_sk = skops->sk;
	struct sock *sk;

	if (!bpf_sk)
		return 1;

	sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
	if (!sk)
		return 1;

	switch (skops->op) {
	case BPF_SOCK_OPS_TCP_LISTEN_CB:
		nr_listen += !bpf_test_sockopt(skops, sk);
		break;
	case BPF_SOCK_OPS_TCP_CONNECT_CB:
		nr_connect += !bpf_test_sockopt(skops, sk);
		break;
	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
		nr_active += !bpf_test_sockopt(skops, sk);
		break;
	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
		nr_passive += !bpf_test_sockopt(skops, sk);
		break;
	}

	return 1;
}

char _license[] SEC("license") = "GPL";