Commit ca06f55b authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Daniel Borkmann
Browse files

bpf: Add per-program recursion prevention mechanism



Since both sleepable and non-sleepable programs execute under migrate_disable
add recursion prevention mechanism to both types of programs when they're
executed via bpf trampoline.

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210210033634.62081-5-alexei.starovoitov@gmail.com
parent f2dd3b39
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -1740,8 +1740,11 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
			   struct bpf_prog *p, int stack_size, bool mod_ret)
{
	u8 *prog = *pprog;
	u8 *jmp_insn;
	int cnt = 0;

	/* arg1: mov rdi, progs[i] */
	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
	if (emit_call(&prog,
		      p->aux->sleepable ? __bpf_prog_enter_sleepable :
		      __bpf_prog_enter, prog))
@@ -1749,6 +1752,14 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
	/* remember prog start time returned by __bpf_prog_enter */
	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);

	/* if (__bpf_prog_enter*(prog) == 0)
	 *	goto skip_exec_of_prog;
	 */
	EMIT3(0x48, 0x85, 0xC0);  /* test rax,rax */
	/* emit 2 nops that will be replaced with JE insn */
	jmp_insn = prog;
	emit_nops(&prog, 2);

	/* arg1: lea rdi, [rbp - stack_size] */
	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
	/* arg2: progs[i]->insnsi for interpreter */
@@ -1767,6 +1778,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
	if (mod_ret)
		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);

	/* replace 2 nops with JE insn, since jmp target is known */
	jmp_insn[0] = X86_JE;
	jmp_insn[1] = prog - jmp_insn - 2;

	/* arg1: mov rdi, progs[i] */
	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
	/* arg2: mov rsi, rbx <- start time in nsec */
+3 −3
Original line number Diff line number Diff line
@@ -529,7 +529,7 @@ struct btf_func_model {
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
 * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
 */
#define BPF_MAX_TRAMP_PROGS 40
#define BPF_MAX_TRAMP_PROGS 38

struct bpf_tramp_progs {
	struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
@@ -561,9 +561,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
				struct bpf_tramp_progs *tprogs,
				void *orig_call);
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(void);
u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
u64 notrace __bpf_prog_enter_sleepable(void);
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);

struct bpf_ksym {
+1 −0
Original line number Diff line number Diff line
@@ -565,6 +565,7 @@ struct bpf_prog {
	u32			jited_len;	/* Size of jited insns in bytes */
	u8			tag[BPF_TAG_SIZE];
	struct bpf_prog_stats __percpu *stats;
	int __percpu		*active;
	unsigned int		(*bpf_func)(const void *ctx,
					    const struct bpf_insn *insn);
	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
+8 −0
Original line number Diff line number Diff line
@@ -91,6 +91,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
		vfree(fp);
		return NULL;
	}
	fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags);
	if (!fp->active) {
		vfree(fp);
		kfree(aux);
		return NULL;
	}

	fp->pages = size / PAGE_SIZE;
	fp->aux = aux;
@@ -116,6 +122,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)

	prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
	if (!prog->stats) {
		free_percpu(prog->active);
		kfree(prog->aux);
		vfree(prog);
		return NULL;
@@ -253,6 +260,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
		kfree(fp->aux);
	}
	free_percpu(fp->stats);
	free_percpu(fp->active);
	vfree(fp);
}

+19 −4
Original line number Diff line number Diff line
@@ -381,13 +381,16 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
	mutex_unlock(&trampoline_mutex);
}

#define NO_START_TIME 0
#define NO_START_TIME 1
static u64 notrace bpf_prog_start_time(void)
{
	u64 start = NO_START_TIME;

	if (static_branch_unlikely(&bpf_stats_enabled_key))
	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
		start = sched_clock();
		if (unlikely(!start))
			start = NO_START_TIME;
	}
	return start;
}

@@ -397,12 +400,20 @@ static u64 notrace bpf_prog_start_time(void)
 * call __bpf_prog_enter
 * call prog->bpf_func
 * call __bpf_prog_exit
 *
 * __bpf_prog_enter returns:
 * 0 - skip execution of the bpf prog
 * 1 - execute bpf prog
 * [2..MAX_U64] - excute bpf prog and record execution time.
 *     This is start time.
 */
u64 notrace __bpf_prog_enter(void)
u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
	__acquires(RCU)
{
	rcu_read_lock();
	migrate_disable();
	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
		return 0;
	return bpf_prog_start_time();
}

@@ -430,21 +441,25 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
	__releases(RCU)
{
	update_prog_stats(prog, start);
	__this_cpu_dec(*(prog->active));
	migrate_enable();
	rcu_read_unlock();
}

u64 notrace __bpf_prog_enter_sleepable(void)
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
{
	rcu_read_lock_trace();
	migrate_disable();
	might_fault();
	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
		return 0;
	return bpf_prog_start_time();
}

void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
{
	update_prog_stats(prog, start);
	__this_cpu_dec(*(prog->active));
	migrate_enable();
	rcu_read_unlock_trace();
}
Loading