Commit b98057ef authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Add bpf_loop helper'



Joanne Koong says:

====================

This patchset add a new helper, bpf_loop.

One of the complexities of using for loops in bpf programs is that the verifier
needs to ensure that in every possibility of the loop logic, the loop will always
terminate. As such, there is a limit on how many iterations the loop can do.

The bpf_loop helper moves the loop logic into the kernel and can thereby
guarantee that the loop will always terminate. The bpf_loop helper simplifies
a lot of the complexity the verifier needs to check, as well as removes the
constraint on the number of loops able to be run.

From the test results, we see that using bpf_loop in place
of the traditional for loop led to a decrease in verification time
and number of bpf instructions by ~99%. The benchmark results show
that as the number of iterations increases, the overhead per iteration
decreases.

The high-level overview of the patches -
Patch 1 - kernel-side + API changes for adding bpf_loop
Patch 2 - tests
Patch 3 - use bpf_loop in strobemeta + pyperf600 and measure verifier performance
Patch 4 - benchmark for throughput + latency of bpf_loop call

v3 -> v4:
~ Address nits: use usleep for triggering bpf programs, fix copyright style

v2 -> v3:
~ Rerun benchmarks on physical machine, update results
~ Propagate original error codes in the verifier

v1 -> v2:
~ Change helper name to bpf_loop (instead of bpf_for_each)
~ Set max nr_loops (~8 million loops) for bpf_loop call
~ Split tests + strobemeta/pyperf600 changes into two patches
~ Add new ops_report_final helper for outputting throughput and latency
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 88691e9e ec151037
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
extern const struct bpf_func_proto bpf_loop_proto;

const struct bpf_func_proto *tracing_prog_func_proto(
  enum bpf_func_id func_id, const struct bpf_prog *prog);
+25 −0
Original line number Diff line number Diff line
@@ -4957,6 +4957,30 @@ union bpf_attr {
 *		**-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
 *		**-EBUSY** if failed to try lock mmap_lock.
 *		**-EINVAL** for invalid **flags**.
 *
 * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
 *	Description
 *		For **nr_loops**, call **callback_fn** function
 *		with **callback_ctx** as the context parameter.
 *		The **callback_fn** should be a static function and
 *		the **callback_ctx** should be a pointer to the stack.
 *		The **flags** is used to control certain aspects of the helper.
 *		Currently, the **flags** must be 0. Currently, nr_loops is
 *		limited to 1 << 23 (~8 million) loops.
 *
 *		long (\*callback_fn)(u32 index, void \*ctx);
 *
 *		where **index** is the current index in the loop. The index
 *		is zero-indexed.
 *
 *		If **callback_fn** returns 0, the helper will continue to the next
 *		loop. If return value is 1, the helper will skip the rest of
 *		the loops and return. Other return values are not used now,
 *		and will be rejected by the verifier.
 *
 *	Return
 *		The number of loops performed, **-EINVAL** for invalid **flags**,
 *		**-E2BIG** if **nr_loops** exceeds the maximum number of loops.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -5140,6 +5164,7 @@ union bpf_attr {
	FN(skc_to_unix_sock),		\
	FN(kallsyms_lookup_name),	\
	FN(find_vma),			\
	FN(loop),			\
	/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+35 −0
Original line number Diff line number Diff line
@@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
	.arg3_type	= ARG_PTR_TO_STACK_OR_NULL,
	.arg4_type	= ARG_ANYTHING,
};

/* maximum number of loops */
#define MAX_LOOPS	BIT(23)

BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
	   u64, flags)
{
	bpf_callback_t callback = (bpf_callback_t)callback_fn;
	u64 ret;
	u32 i;

	if (flags)
		return -EINVAL;
	if (nr_loops > MAX_LOOPS)
		return -E2BIG;

	for (i = 0; i < nr_loops; i++) {
		ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0);
		/* return value: 0 - continue, 1 - stop and return */
		if (ret)
			return i + 1;
	}

	return i;
}

const struct bpf_func_proto bpf_loop_proto = {
	.func		= bpf_loop,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_ANYTHING,
	.arg2_type	= ARG_PTR_TO_FUNC,
	.arg3_type	= ARG_PTR_TO_STACK_OR_NULL,
	.arg4_type	= ARG_ANYTHING,
};
+2 −0
Original line number Diff line number Diff line
@@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
		return &bpf_ringbuf_query_proto;
	case BPF_FUNC_for_each_map_elem:
		return &bpf_for_each_map_elem_proto;
	case BPF_FUNC_loop:
		return &bpf_loop_proto;
	default:
		break;
	}
+54 −34
Original line number Diff line number Diff line
@@ -6085,6 +6085,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
	return 0;
}

static int set_loop_callback_state(struct bpf_verifier_env *env,
				   struct bpf_func_state *caller,
				   struct bpf_func_state *callee,
				   int insn_idx)
{
	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
	 *	    u64 flags);
	 * callback_fn(u32 index, void *callback_ctx);
	 */
	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];

	/* unused */
	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);

	callee->in_callback_fn = true;
	return 0;
}

static int set_timer_callback_state(struct bpf_verifier_env *env,
				    struct bpf_func_state *caller,
				    struct bpf_func_state *callee,
@@ -6458,13 +6479,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
			return err;
	}

	if (func_id == BPF_FUNC_tail_call) {
		err = check_reference_leak(env);
		if (err) {
			verbose(env, "tail_call would lead to reference leak\n");
			return err;
		}
	} else if (is_release_function(func_id)) {
	if (is_release_function(func_id)) {
		err = release_reference(env, meta.ref_obj_id);
		if (err) {
			verbose(env, "func %s#%d reference has not been acquired before\n",
@@ -6475,41 +6490,46 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn

	regs = cur_regs(env);

	switch (func_id) {
	case BPF_FUNC_tail_call:
		err = check_reference_leak(env);
		if (err) {
			verbose(env, "tail_call would lead to reference leak\n");
			return err;
		}
		break;
	case BPF_FUNC_get_local_storage:
		/* check that flags argument in get_local_storage(map, flags) is 0,
		 * this is required because get_local_storage() can't return an error.
		 */
	if (func_id == BPF_FUNC_get_local_storage &&
	    !register_is_null(&regs[BPF_REG_2])) {
		if (!register_is_null(&regs[BPF_REG_2])) {
			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
			return -EINVAL;
		}

	if (func_id == BPF_FUNC_for_each_map_elem) {
		break;
	case BPF_FUNC_for_each_map_elem:
		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
					set_map_elem_callback_state);
		if (err < 0)
			return -EINVAL;
	}

	if (func_id == BPF_FUNC_timer_set_callback) {
		break;
	case BPF_FUNC_timer_set_callback:
		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
					set_timer_callback_state);
		if (err < 0)
			return -EINVAL;
	}

	if (func_id == BPF_FUNC_find_vma) {
		break;
	case BPF_FUNC_find_vma:
		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
					set_find_vma_callback_state);
		if (err < 0)
			return -EINVAL;
		break;
	case BPF_FUNC_snprintf:
		err = check_bpf_snprintf_call(env, regs);
		break;
	case BPF_FUNC_loop:
		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
					set_loop_callback_state);
		break;
	}

	if (func_id == BPF_FUNC_snprintf) {
		err = check_bpf_snprintf_call(env, regs);
		if (err < 0)
	if (err)
		return err;
	}

	/* reset caller saved regs */
	for (i = 0; i < CALLER_SAVED_REGS; i++) {
Loading