Commit 2fae6771 authored by Martin KaFai Lau's avatar Martin KaFai Lau
Browse files

Merge branch 'cgroup/connect{4,6} programs for unprivileged ICMP ping'



YiFei Zhu says:

====================

Usually when a TCP/UDP connection is initiated, we can bind the socket
to a specific IP attached to an interface in a cgroup/connect hook.
But for pings, this is impossible, as the hook is not being called.

This series adds the invocation for cgroup/connect{4,6} programs to
unprivileged ICMP ping (i.e. ping sockets created with SOCK_DGRAM
IPPROTO_ICMP(V6) as opposed to SOCK_RAW). This also adds a test to
verify that the hooks are being called and invoking bpf_bind() from
within the hook actually binds the socket.

Patch 1 adds the invocation of the hook.
Patch 2 deduplicates write_sysctl in BPF test_progs.
Patch 3 adds the tests for this hook.

v1 -> v2:
* Added static to bindaddr_v6 in prog_tests/connect_ping.c
* Deduplicated much of the test logic in prog_tests/connect_ping.c
* Deduplicated write_sysctl() to test_progs.c

v2 -> v3:
* Renamed variable "obj" to "skel" for the BPF skeleton object in
  prog_tests/connect_ping.c

v3 -> v4:
* Fixed error path to destroy skel in prog_tests/connect_ping.c
====================

Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents 665f5d35 58c449a9
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/export.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h>
#include <net/ping.h>
#include <net/udp.h>
@@ -295,6 +296,19 @@ void ping_close(struct sock *sk, long timeout)
}
EXPORT_SYMBOL_GPL(ping_close);

static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr,
			    int addr_len)
{
	/* This check is replicated from __ip4_datagram_connect() and
	 * intended to prevent BPF program called below from accessing bytes
	 * that are out of the bound specified by user in addr_len.
	 */
	if (addr_len < sizeof(struct sockaddr_in))
		return -EINVAL;

	return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
}

/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
				struct sockaddr *uaddr, int addr_len)
@@ -1009,6 +1023,7 @@ struct proto ping_prot = {
	.owner =	THIS_MODULE,
	.init =		ping_init_sock,
	.close =	ping_close,
	.pre_connect =	ping_pre_connect,
	.connect =	ip4_datagram_connect,
	.disconnect =	__udp_disconnect,
	.setsockopt =	ip_setsockopt,
+16 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <net/udp.h>
#include <net/transp_v6.h>
#include <linux/proc_fs.h>
#include <linux/bpf-cgroup.h>
#include <net/ping.h>

static void ping_v6_destroy(struct sock *sk)
@@ -49,6 +50,20 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
	return 0;
}

static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
			       int addr_len)
{
	/* This check is replicated from __ip6_datagram_connect() and
	 * intended to prevent BPF program called below from accessing
	 * bytes that are out of the bound specified by user in addr_len.
	 */

	if (addr_len < SIN6_LEN_RFC2133)
		return -EINVAL;

	return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
}

static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
	struct inet_sock *inet = inet_sk(sk);
@@ -191,6 +206,7 @@ struct proto pingv6_prot = {
	.init =		ping_init_sock,
	.close =	ping_close,
	.destroy =	ping_v6_destroy,
	.pre_connect =	ping_v6_pre_connect,
	.connect =	ip6_datagram_connect_v6_only,
	.disconnect =	__udp_disconnect,
	.setsockopt =	ipv6_setsockopt,
+0 −20
Original line number Diff line number Diff line
@@ -22,26 +22,6 @@ static __u32 duration;

#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"

static int write_sysctl(const char *sysctl, const char *value)
{
	int fd, err, len;

	fd = open(sysctl, O_WRONLY);
	if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
		  sysctl, strerror(errno), errno))
		return -1;

	len = strlen(value);
	err = write(fd, value, len);
	close(fd);
	if (CHECK(err != len, "write sysctl",
		  "write(%s, %s, %d): err:%d %s (%d)\n",
		  sysctl, value, len, err, strerror(errno), errno))
		return -1;

	return 0;
}

static int prepare_netns(void)
{
	if (CHECK(unshare(CLONE_NEWNET), "create netns",
+178 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only

/*
 * Copyright 2022 Google LLC.
 */

#define _GNU_SOURCE
#include <sys/mount.h>

#include "test_progs.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"

#include "connect_ping.skel.h"

/* 2001:db8::1 */
#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
static const struct in6_addr bindaddr_v6 = BINDADDR_V6;

static void subtest(int cgroup_fd, struct connect_ping *skel,
		    int family, int do_bind)
{
	struct sockaddr_in sa4 = {
		.sin_family = AF_INET,
		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
	};
	struct sockaddr_in6 sa6 = {
		.sin6_family = AF_INET6,
		.sin6_addr = IN6ADDR_LOOPBACK_INIT,
	};
	struct sockaddr *sa;
	socklen_t sa_len;
	int protocol;
	int sock_fd;

	switch (family) {
	case AF_INET:
		sa = (struct sockaddr *)&sa4;
		sa_len = sizeof(sa4);
		protocol = IPPROTO_ICMP;
		break;
	case AF_INET6:
		sa = (struct sockaddr *)&sa6;
		sa_len = sizeof(sa6);
		protocol = IPPROTO_ICMPV6;
		break;
	}

	memset(skel->bss, 0, sizeof(*skel->bss));
	skel->bss->do_bind = do_bind;

	sock_fd = socket(family, SOCK_DGRAM, protocol);
	if (!ASSERT_GE(sock_fd, 0, "sock-create"))
		return;

	if (!ASSERT_OK(connect(sock_fd, sa, sa_len), "connect"))
		goto close_sock;

	if (!ASSERT_EQ(skel->bss->invocations_v4, family == AF_INET ? 1 : 0,
		       "invocations_v4"))
		goto close_sock;
	if (!ASSERT_EQ(skel->bss->invocations_v6, family == AF_INET6 ? 1 : 0,
		       "invocations_v6"))
		goto close_sock;
	if (!ASSERT_EQ(skel->bss->has_error, 0, "has_error"))
		goto close_sock;

	if (!ASSERT_OK(getsockname(sock_fd, sa, &sa_len),
		       "getsockname"))
		goto close_sock;

	switch (family) {
	case AF_INET:
		if (!ASSERT_EQ(sa4.sin_family, family, "sin_family"))
			goto close_sock;
		if (!ASSERT_EQ(sa4.sin_addr.s_addr,
			       htonl(do_bind ? 0x01010101 : INADDR_LOOPBACK),
			       "sin_addr"))
			goto close_sock;
		break;
	case AF_INET6:
		if (!ASSERT_EQ(sa6.sin6_family, AF_INET6, "sin6_family"))
			goto close_sock;
		if (!ASSERT_EQ(memcmp(&sa6.sin6_addr,
				      do_bind ? &bindaddr_v6 : &in6addr_loopback,
				      sizeof(sa6.sin6_addr)),
			       0, "sin6_addr"))
			goto close_sock;
		break;
	}

close_sock:
	close(sock_fd);
}

void test_connect_ping(void)
{
	struct connect_ping *skel;
	int cgroup_fd;

	if (!ASSERT_OK(unshare(CLONE_NEWNET | CLONE_NEWNS), "unshare"))
		return;

	/* overmount sysfs, and making original sysfs private so overmount
	 * does not propagate to other mntns.
	 */
	if (!ASSERT_OK(mount("none", "/sys", NULL, MS_PRIVATE, NULL),
		       "remount-private-sys"))
		return;
	if (!ASSERT_OK(mount("sysfs", "/sys", "sysfs", 0, NULL),
		       "mount-sys"))
		return;
	if (!ASSERT_OK(mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL),
		       "mount-bpf"))
		goto clean_mount;

	if (!ASSERT_OK(system("ip link set dev lo up"), "lo-up"))
		goto clean_mount;
	if (!ASSERT_OK(system("ip addr add 1.1.1.1 dev lo"), "lo-addr-v4"))
		goto clean_mount;
	if (!ASSERT_OK(system("ip -6 addr add 2001:db8::1 dev lo"), "lo-addr-v6"))
		goto clean_mount;
	if (write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"))
		goto clean_mount;

	cgroup_fd = test__join_cgroup("/connect_ping");
	if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
		goto clean_mount;

	skel = connect_ping__open_and_load();
	if (!ASSERT_OK_PTR(skel, "skel-load"))
		goto close_cgroup;
	skel->links.connect_v4_prog =
		bpf_program__attach_cgroup(skel->progs.connect_v4_prog, cgroup_fd);
	if (!ASSERT_OK_PTR(skel->links.connect_v4_prog, "cg-attach-v4"))
		goto skel_destroy;
	skel->links.connect_v6_prog =
		bpf_program__attach_cgroup(skel->progs.connect_v6_prog, cgroup_fd);
	if (!ASSERT_OK_PTR(skel->links.connect_v6_prog, "cg-attach-v6"))
		goto skel_destroy;

	/* Connect a v4 ping socket to localhost, assert that only v4 is called,
	 * and called exactly once, and that the socket's bound address is
	 * original loopback address.
	 */
	if (test__start_subtest("ipv4"))
		subtest(cgroup_fd, skel, AF_INET, 0);

	/* Connect a v4 ping socket to localhost, assert that only v4 is called,
	 * and called exactly once, and that the socket's bound address is
	 * address we explicitly bound.
	 */
	if (test__start_subtest("ipv4-bind"))
		subtest(cgroup_fd, skel, AF_INET, 1);

	/* Connect a v6 ping socket to localhost, assert that only v6 is called,
	 * and called exactly once, and that the socket's bound address is
	 * original loopback address.
	 */
	if (test__start_subtest("ipv6"))
		subtest(cgroup_fd, skel, AF_INET6, 0);

	/* Connect a v6 ping socket to localhost, assert that only v6 is called,
	 * and called exactly once, and that the socket's bound address is
	 * address we explicitly bound.
	 */
	if (test__start_subtest("ipv6-bind"))
		subtest(cgroup_fd, skel, AF_INET6, 1);

skel_destroy:
	connect_ping__destroy(skel);

close_cgroup:
	close(cgroup_fd);

clean_mount:
	umount2("/sys", MNT_DETACH);
}
+0 −20
Original line number Diff line number Diff line
@@ -54,26 +54,6 @@ static int create_netns(void)
	return 0;
}

static int write_sysctl(const char *sysctl, const char *value)
{
	int fd, err, len;

	fd = open(sysctl, O_WRONLY);
	if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
		  sysctl, strerror(errno), errno))
		return -1;

	len = strlen(value);
	err = write(fd, value, len);
	close(fd);
	if (CHECK(err != len, "write sysctl",
		  "write(%s, %s): err:%d %s (%d)\n",
		  sysctl, value, err, strerror(errno), errno))
		return -1;

	return 0;
}

static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
{
	fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
Loading