Commit 64696c40 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov
Browse files

bpf: Add __bpf_prog_{enter,exit}_struct_ops for struct_ops trampoline



The struct_ops prog is to allow using bpf to implement the functions in
a struct (eg. kernel module).  The current usage is to implement the
tcp_congestion.  The kernel does not call the tcp-cc's ops (ie.
the bpf prog) in a recursive way.

The struct_ops is sharing the tracing-trampoline's enter/exit
function which tracks prog->active to avoid recursion.  It is
needed for tracing prog.  However, it turns out the struct_ops
bpf prog will hit this prog->active and unnecessarily skipped
running the struct_ops prog.  eg.  The '.ssthresh' may run in_task()
and then interrupted by softirq that runs the same '.ssthresh'.
Skip running the '.ssthresh' will end up returning random value
to the caller.

The patch adds __bpf_prog_{enter,exit}_struct_ops for the
struct_ops trampoline.  They do not track the prog->active
to detect recursion.

One exception is when the tcp_congestion's '.init' ops is doing
bpf_setsockopt(TCP_CONGESTION) and then recurs to the same
'.init' ops.  This will be addressed in the following patches.

Fixes: ca06f55b ("bpf: Add per-program recursion prevention mechanism")
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20220929070407.965581-2-martin.lau@linux.dev


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 8526f0d6
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -1836,6 +1836,9 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
	if (p->aux->sleepable) {
		enter = __bpf_prog_enter_sleepable;
		exit = __bpf_prog_exit_sleepable;
	} else if (p->type == BPF_PROG_TYPE_STRUCT_OPS) {
		enter = __bpf_prog_enter_struct_ops;
		exit = __bpf_prog_exit_struct_ops;
	} else if (p->expected_attach_type == BPF_LSM_CGROUP) {
		enter = __bpf_prog_enter_lsm_cgroup;
		exit = __bpf_prog_exit_lsm_cgroup;
+4 −0
Original line number Diff line number Diff line
@@ -864,6 +864,10 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
					struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
					struct bpf_tramp_run_ctx *run_ctx);
u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
					struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
					struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);

+23 −0
Original line number Diff line number Diff line
@@ -964,6 +964,29 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
	rcu_read_unlock_trace();
}

u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
					struct bpf_tramp_run_ctx *run_ctx)
	__acquires(RCU)
{
	rcu_read_lock();
	migrate_disable();

	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);

	return bpf_prog_start_time();
}

void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
					struct bpf_tramp_run_ctx *run_ctx)
	__releases(RCU)
{
	bpf_reset_run_ctx(run_ctx->saved_run_ctx);

	update_prog_stats(prog, start);
	migrate_enable();
	rcu_read_unlock();
}

void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
{
	percpu_ref_get(&tr->pcref);