Commit fddbf4b6 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf: Support calling kernel function'



Martin KaFai says:

====================

This series adds support to allow bpf program calling kernel function.

The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()").  Those
functions have already been used by some kernel tcp-cc implementations.

This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation,  For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.

The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed.  The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.

Please see individual patch for details.

v2:
- Patch 2 in v1 is removed.  No need to support extern func in kernel.
  Changed libbpf to adjust the .ksyms datasec for extern func
  in patch 11. (Andrii)
- Name change: btf_check_func_arg_match() and btf_check_subprog_arg_match()
  in patch 2. (Andrii)
- Always set unreliable on any error in patch 2 since it does not
  matter. (Andrii)
- s/kern_func/kfunc/ and s/descriptor/desc/ in this set. (Andrii)
- Remove some unnecessary changes in disasm.h and disasm.c
  in patch 3.  In particular, no need to change the function
  signature in bpf_insn_revmap_call_t.  Also, removed the changes
  in print_bpf_insn().
- Fixed an issue in check_kfunc_call() when the calling kernel function
  returns a pointer in patch 3.  Added a selftest.
- Adjusted the verifier selftests due to the changes in the verifier log
  in patch 3.
- Fixed a comparison issue in kfunc_desc_cmp_by_imm() in patch 3. (Andrii)
- Name change: is_ldimm64_insn(),
  new helper: is_call_insn() in patch 10 (Andrii)
- Move btf_func_linkage() from btf.h to libbpf.c in patch 11. (Andrii)
- Fixed the linker error when CONFIG_BPF_SYSCALL is not defined.
  Moved the check_kfunc_call from filter.c to test_run.c in patch 14.
  (kernel test robot)
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 36e79851 7bd1590d
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -2346,3 +2346,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
					   tmp : orig_prog);
	return prog;
}

bool bpf_jit_supports_kfunc_call(void)
{
	return true;
}
+198 −0
Original line number Diff line number Diff line
@@ -1390,6 +1390,19 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog)
	*pprog = prog;
}

static void emit_push_r32(const u8 src[], u8 **pprog)
{
	u8 *prog = *pprog;
	int cnt = 0;

	/* mov ecx,dword ptr [ebp+off] */
	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
	/* push ecx */
	EMIT1(0x51);

	*pprog = prog;
}

static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
{
	u8 jmp_cond;
@@ -1459,6 +1472,174 @@ static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
	return jmp_cond;
}

/* i386 kernel compiles with "-mregparm=3".  From gcc document:
 *
 * ==== snippet ====
 * regparm (number)
 *	On x86-32 targets, the regparm attribute causes the compiler
 *	to pass arguments number one to (number) if they are of integral
 *	type in registers EAX, EDX, and ECX instead of on the stack.
 *	Functions that take a variable number of arguments continue
 *	to be passed all of their arguments on the stack.
 * ==== snippet ====
 *
 * The first three args of a function will be considered for
 * putting into the 32bit register EAX, EDX, and ECX.
 *
 * Two 32bit registers are used to pass a 64bit arg.
 *
 * For example,
 * void foo(u32 a, u32 b, u32 c, u32 d):
 *	u32 a: EAX
 *	u32 b: EDX
 *	u32 c: ECX
 *	u32 d: stack
 *
 * void foo(u64 a, u32 b, u32 c):
 *	u64 a: EAX (lo32) EDX (hi32)
 *	u32 b: ECX
 *	u32 c: stack
 *
 * void foo(u32 a, u64 b, u32 c):
 *	u32 a: EAX
 *	u64 b: EDX (lo32) ECX (hi32)
 *	u32 c: stack
 *
 * void foo(u32 a, u32 b, u64 c):
 *	u32 a: EAX
 *	u32 b: EDX
 *	u64 c: stack
 *
 * The return value will be stored in the EAX (and EDX for 64bit value).
 *
 * For example,
 * u32 foo(u32 a, u32 b, u32 c):
 *	return value: EAX
 *
 * u64 foo(u32 a, u32 b, u32 c):
 *	return value: EAX (lo32) EDX (hi32)
 *
 * Notes:
 *	The verifier only accepts function having integer and pointers
 *	as its args and return value, so it does not have
 *	struct-by-value.
 *
 * emit_kfunc_call() finds out the btf_func_model by calling
 * bpf_jit_find_kfunc_model().  A btf_func_model
 * has the details about the number of args, size of each arg,
 * and the size of the return value.
 *
 * It first decides how many args can be passed by EAX, EDX, and ECX.
 * That will decide what args should be pushed to the stack:
 * [first_stack_regno, last_stack_regno] are the bpf regnos
 * that should be pushed to the stack.
 *
 * It will first push all args to the stack because the push
 * will need to use ECX.  Then, it moves
 * [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX.
 *
 * When emitting a call (0xE8), it needs to figure out
 * the jmp_offset relative to the jit-insn address immediately
 * following the call (0xE8) instruction.  At this point, it knows
 * the end of the jit-insn address after completely translated the
 * current (BPF_JMP | BPF_CALL) bpf-insn.  It is passed as "end_addr"
 * to the emit_kfunc_call().  Thus, it can learn the "immediate-follow-call"
 * address by figuring out how many jit-insn is generated between
 * the call (0xE8) and the end_addr:
 *	- 0-1 jit-insn (3 bytes each) to restore the esp pointer if there
 *	  is arg pushed to the stack.
 *	- 0-2 jit-insns (3 bytes each) to handle the return value.
 */
static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr,
			   const struct bpf_insn *insn, u8 **pprog)
{
	const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX };
	int i, cnt = 0, first_stack_regno, last_stack_regno;
	int free_arg_regs = ARRAY_SIZE(arg_regs);
	const struct btf_func_model *fm;
	int bytes_in_stack = 0;
	const u8 *cur_arg_reg;
	u8 *prog = *pprog;
	s64 jmp_offset;

	fm = bpf_jit_find_kfunc_model(bpf_prog, insn);
	if (!fm)
		return -EINVAL;

	first_stack_regno = BPF_REG_1;
	for (i = 0; i < fm->nr_args; i++) {
		int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1;

		if (regs_needed > free_arg_regs)
			break;

		free_arg_regs -= regs_needed;
		first_stack_regno++;
	}

	/* Push the args to the stack */
	last_stack_regno = BPF_REG_0 + fm->nr_args;
	for (i = last_stack_regno; i >= first_stack_regno; i--) {
		if (fm->arg_size[i - 1] > sizeof(u32)) {
			emit_push_r64(bpf2ia32[i], &prog);
			bytes_in_stack += 8;
		} else {
			emit_push_r32(bpf2ia32[i], &prog);
			bytes_in_stack += 4;
		}
	}

	cur_arg_reg = &arg_regs[0];
	for (i = BPF_REG_1; i < first_stack_regno; i++) {
		/* mov e[adc]x,dword ptr [ebp+off] */
		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
		      STACK_VAR(bpf2ia32[i][0]));
		if (fm->arg_size[i - 1] > sizeof(u32))
			/* mov e[adc]x,dword ptr [ebp+off] */
			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
			      STACK_VAR(bpf2ia32[i][1]));
	}

	if (bytes_in_stack)
		/* add esp,"bytes_in_stack" */
		end_addr -= 3;

	/* mov dword ptr [ebp+off],edx */
	if (fm->ret_size > sizeof(u32))
		end_addr -= 3;

	/* mov dword ptr [ebp+off],eax */
	if (fm->ret_size)
		end_addr -= 3;

	jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr;
	if (!is_simm32(jmp_offset)) {
		pr_err("unsupported BPF kernel function jmp_offset:%lld\n",
		       jmp_offset);
		return -EINVAL;
	}

	EMIT1_off32(0xE8, jmp_offset);

	if (fm->ret_size)
		/* mov dword ptr [ebp+off],eax */
		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
		      STACK_VAR(bpf2ia32[BPF_REG_0][0]));

	if (fm->ret_size > sizeof(u32))
		/* mov dword ptr [ebp+off],edx */
		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
		      STACK_VAR(bpf2ia32[BPF_REG_0][1]));

	if (bytes_in_stack)
		/* add esp,"bytes_in_stack" */
		EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack);

	*pprog = prog;

	return 0;
}

static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
		  int oldproglen, struct jit_context *ctx)
{
@@ -1888,6 +2069,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
			if (insn->src_reg == BPF_PSEUDO_CALL)
				goto notyet;

			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
				int err;

				err = emit_kfunc_call(bpf_prog,
						      image + addrs[i],
						      insn, &prog);

				if (err)
					return err;
				break;
			}

			func = (u8 *) __bpf_call_base + imm32;
			jmp_offset = func - (image + addrs[i]);

@@ -2393,3 +2586,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
					   tmp : orig_prog);
	return prog;
}

bool bpf_jit_supports_kfunc_call(void)
{
	return true;
}
+32 −2
Original line number Diff line number Diff line
@@ -427,6 +427,7 @@ enum bpf_reg_type {
	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
	PTR_TO_FUNC,		 /* reg points to a bpf program function */
	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
	__BPF_REG_TYPE_MAX,
};

/* The information passed from prog-specific *_is_valid_access
@@ -480,6 +481,7 @@ struct bpf_verifier_ops {
				 const struct btf_type *t, int off, int size,
				 enum bpf_access_type atype,
				 u32 *next_btf_id);
	bool (*check_kfunc_call)(u32 kfunc_btf_id);
};

struct bpf_prog_offload_ops {
@@ -796,6 +798,8 @@ struct btf_mod_pair {
	struct module *module;
};

struct bpf_kfunc_desc_tab;

struct bpf_prog_aux {
	atomic64_t refcnt;
	u32 used_map_cnt;
@@ -832,6 +836,7 @@ struct bpf_prog_aux {
	struct bpf_prog **func;
	void *jit_data; /* JIT specific data. arch dependent */
	struct bpf_jit_poke_descriptor *poke_tab;
	struct bpf_kfunc_desc_tab *kfunc_tab;
	u32 size_poke_tab;
	struct bpf_ksym ksym;
	const struct bpf_prog_ops *ops;
@@ -1527,6 +1532,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
				const union bpf_attr *kattr,
				union bpf_attr __user *uattr);
bool bpf_prog_test_check_kfunc_call(u32 kfunc_id);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
		    const struct bpf_prog *prog,
		    struct bpf_insn_access_aux *info);
@@ -1545,7 +1551,10 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
			   struct btf_func_model *m);

struct bpf_reg_state;
int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
				struct bpf_reg_state *regs);
int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
			      const struct btf *btf, u32 func_id,
			      struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
			  struct bpf_reg_state *reg);
@@ -1557,6 +1566,10 @@ struct bpf_link *bpf_link_by_id(u32 id);

const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
void bpf_task_storage_free(struct task_struct *task);
bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
			 const struct bpf_insn *insn);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -1719,6 +1732,11 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
	return -ENOTSUPP;
}

static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
{
	return false;
}

static inline void bpf_map_put(struct bpf_map *map)
{
}
@@ -1737,6 +1755,18 @@ bpf_base_func_proto(enum bpf_func_id func_id)
static inline void bpf_task_storage_free(struct task_struct *task)
{
}

static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
{
	return false;
}

static inline const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
			 const struct bpf_insn *insn)
{
	return NULL;
}
#endif /* CONFIG_BPF_SYSCALL */

void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
+6 −0
Original line number Diff line number Diff line
@@ -110,6 +110,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
const struct btf_type *
btf_resolve_size(const struct btf *btf, const struct btf_type *type,
		 u32 *type_size);
const char *btf_type_str(const struct btf_type *t);

#define for_each_member(i, struct_type, member)			\
	for (i = 0, member = btf_type_member(struct_type);	\
@@ -141,6 +142,11 @@ static inline bool btf_type_is_enum(const struct btf_type *t)
	return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
}

static inline bool btf_type_is_scalar(const struct btf_type *t)
{
	return btf_type_is_int(t) || btf_type_is_enum(t);
}

static inline bool btf_type_is_typedef(const struct btf_type *t)
{
	return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
+2 −2
Original line number Diff line number Diff line
@@ -877,8 +877,7 @@ void bpf_prog_free_linfo(struct bpf_prog *prog);
void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
			       const u32 *insn_to_jit_off);
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);
void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
void bpf_prog_jit_attempt_done(struct bpf_prog *prog);

struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
@@ -919,6 +918,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_helper_changes_pkt_data(void *func);

static inline bool bpf_dump_raw_ok(const struct cred *cred)
Loading