Commit bb7a4257 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Align BPF TCP CCs implementing cong_control() with non-BPF CCs'



Jörn-Thorben Hinz says:

====================

This series corrects some inconveniences for a BPF TCP CC that
implements and uses tcp_congestion_ops.cong_control(). Until now, such a
CC did not have all necessary write access to struct sock and
unnecessarily needed to implement cong_avoid().

v4:
 - Remove braces around single statements after if
 - Don’t check pointer passed to bpf_link__destroy()
v3:
 - Add a selftest writing sk_pacing_*
 - Add a selftest with incomplete tcp_congestion_ops
 - Add a selftest with unsupported get_info()
 - Remove an unused variable
 - Reword a comment about reg() in bpf_struct_ops_map_update_elem()
v2:
 - Drop redundant check for required functions and just rely on
   tcp_register_congestion_control() (Martin KaFai Lau)
====================

Reviewed-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 9676fecc f14a3f64
Loading
Loading
Loading
Loading
+3 −4
Original line number Diff line number Diff line
@@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
		goto unlock;
	}

	/* Error during st_ops->reg().  It is very unlikely since
	 * the above init_member() should have caught it earlier
	 * before reg().  The only possibility is if there was a race
	 * in registering the struct_ops (under the same name) to
	/* Error during st_ops->reg(). Can happen if this struct_ops needs to be
	 * verified as a whole, after all init_member() calls. Can also happen if
	 * there was a race in registering the struct_ops (under the same name) to
	 * a sub-system through different struct_ops's maps.
	 */
	set_memory_nx((long)st_map->image, 1);
+6 −33
Original line number Diff line number Diff line
@@ -14,18 +14,6 @@
/* "extern" is to avoid sparse warning.  It is only used in bpf_struct_ops.c. */
extern struct bpf_struct_ops bpf_tcp_congestion_ops;

static u32 optional_ops[] = {
	offsetof(struct tcp_congestion_ops, init),
	offsetof(struct tcp_congestion_ops, release),
	offsetof(struct tcp_congestion_ops, set_state),
	offsetof(struct tcp_congestion_ops, cwnd_event),
	offsetof(struct tcp_congestion_ops, in_ack_event),
	offsetof(struct tcp_congestion_ops, pkts_acked),
	offsetof(struct tcp_congestion_ops, min_tso_segs),
	offsetof(struct tcp_congestion_ops, sndbuf_expand),
	offsetof(struct tcp_congestion_ops, cong_control),
};

static u32 unsupported_ops[] = {
	offsetof(struct tcp_congestion_ops, get_info),
};
@@ -51,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
	return 0;
}

static bool is_optional(u32 member_offset)
{
	unsigned int i;

	for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
		if (member_offset == optional_ops[i])
			return true;
	}

	return false;
}

static bool is_unsupported(u32 member_offset)
{
	unsigned int i;
@@ -111,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
	}

	switch (off) {
	case offsetof(struct sock, sk_pacing_rate):
		end = offsetofend(struct sock, sk_pacing_rate);
		break;
	case offsetof(struct sock, sk_pacing_status):
		end = offsetofend(struct sock, sk_pacing_status);
		break;
	case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
		end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
		break;
@@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
{
	const struct tcp_congestion_ops *utcp_ca;
	struct tcp_congestion_ops *tcp_ca;
	int prog_fd;
	u32 moff;

	utcp_ca = (const struct tcp_congestion_ops *)udata;
@@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
		return 1;
	}

	if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
		return 0;

	/* Ensure bpf_prog is provided for compulsory func ptr */
	prog_fd = (int)(*(unsigned long *)(udata + moff));
	if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
		return -EINVAL;

	return 0;
}

+61 −0
Original line number Diff line number Diff line
@@ -9,6 +9,9 @@
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
#include "bpf_dctcp_release.skel.h"
#include "tcp_ca_write_sk_pacing.skel.h"
#include "tcp_ca_incompl_cong_ops.skel.h"
#include "tcp_ca_unsupp_cong_op.skel.h"

#ifndef ENOTSUPP
#define ENOTSUPP 524
@@ -322,6 +325,58 @@ static void test_rel_setsockopt(void)
	bpf_dctcp_release__destroy(rel_skel);
}

static void test_write_sk_pacing(void)
{
	struct tcp_ca_write_sk_pacing *skel;
	struct bpf_link *link;

	skel = tcp_ca_write_sk_pacing__open_and_load();
	if (!ASSERT_OK_PTR(skel, "open_and_load"))
		return;

	link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
	ASSERT_OK_PTR(link, "attach_struct_ops");

	bpf_link__destroy(link);
	tcp_ca_write_sk_pacing__destroy(skel);
}

static void test_incompl_cong_ops(void)
{
	struct tcp_ca_incompl_cong_ops *skel;
	struct bpf_link *link;

	skel = tcp_ca_incompl_cong_ops__open_and_load();
	if (!ASSERT_OK_PTR(skel, "open_and_load"))
		return;

	/* That cong_avoid() and cong_control() are missing is only reported at
	 * this point:
	 */
	link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
	ASSERT_ERR_PTR(link, "attach_struct_ops");

	bpf_link__destroy(link);
	tcp_ca_incompl_cong_ops__destroy(skel);
}

static void test_unsupp_cong_op(void)
{
	libbpf_print_fn_t old_print_fn;
	struct tcp_ca_unsupp_cong_op *skel;

	err_str = "attach to unsupported member get_info";
	found = false;
	old_print_fn = libbpf_set_print(libbpf_debug_print);

	skel = tcp_ca_unsupp_cong_op__open_and_load();
	ASSERT_NULL(skel, "open_and_load");
	ASSERT_EQ(found, true, "expected_err_msg");

	tcp_ca_unsupp_cong_op__destroy(skel);
	libbpf_set_print(old_print_fn);
}

void test_bpf_tcp_ca(void)
{
	if (test__start_subtest("dctcp"))
@@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void)
		test_dctcp_fallback();
	if (test__start_subtest("rel_setsockopt"))
		test_rel_setsockopt();
	if (test__start_subtest("write_sk_pacing"))
		test_write_sk_pacing();
	if (test__start_subtest("incompl_cong_ops"))
		test_incompl_cong_ops();
	if (test__start_subtest("unsupp_cong_op"))
		test_unsupp_cong_op();
}
+35 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include "vmlinux.h"

#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

char _license[] SEC("license") = "GPL";

static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
	return (struct tcp_sock *)sk;
}

SEC("struct_ops/incompl_cong_ops_ssthresh")
__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
{
	return tcp_sk(sk)->snd_ssthresh;
}

SEC("struct_ops/incompl_cong_ops_undo_cwnd")
__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
{
	return tcp_sk(sk)->snd_cwnd;
}

SEC(".struct_ops")
struct tcp_congestion_ops incompl_cong_ops = {
	/* Intentionally leaving out any of the required cong_avoid() and
	 * cong_control() here.
	 */
	.ssthresh = (void *)incompl_cong_ops_ssthresh,
	.undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
	.name = "bpf_incompl_ops",
};
+21 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include "vmlinux.h"

#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

char _license[] SEC("license") = "GPL";

SEC("struct_ops/unsupp_cong_op_get_info")
size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
		union tcp_cc_info *info)
{
	return 0;
}

SEC(".struct_ops")
struct tcp_congestion_ops unsupp_cong_op = {
	.get_info = (void *)unsupp_cong_op_get_info,
	.name = "bpf_unsupp_op",
};
Loading