Commit cc0f8353 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf: add bpf_for_each_map_elem() helper'

Yonghong Song says:

====================

This patch set introduced bpf_for_each_map_elem() helper.
The helper permits bpf program iterates through all elements
for a particular map.

The work originally inspired by an internal discussion where
firewall rules are kept in a map and bpf prog wants to
check packet 5 tuples against all rules in the map.
A bounded loop can be used but it has a few drawbacks.
As the loop iteration goes up, verification time goes up too.
For really large maps, verification may fail.
A helper which abstracts out the loop itself will not have
verification time issue.

A recent discussion in [1] involves to iterate all hash map
elements in bpf program. Currently iterating all hashmap elements
in bpf program is not easy if key space is really big.
Having a helper to abstract out the loop itself is even more
meaningful.

The proposed helper signature looks like:
  long bpf_for_each_map_elem(map, callback_fn, callback_ctx, flags)
where callback_fn is a static function and callback_ctx is
a piece of data allocated on the caller stack which can be
accessed by the callback_fn. The callback_fn signature might be
different for different maps. For example, for hash/array maps,
the signature is
  long callback_fn(map, key, val, callback_ctx)

In the rest of series, Patches 1/2/3/4 did some refactoring. Patch 5
implemented core kernel support for the helper. Patches 6 and 7
added hashmap and arraymap support. Patches 8/9 added libbpf
support. Patch 10 added bpftool support. Patches 11 and 12 added
selftests for hashmap and arraymap.

[1]: https://lore.kernel.org/bpf/20210122205415.113822-1-xiyou.wangcong@gmail.com/



Changelogs:
  v4 -> v5:
    - rebase on top of bpf-next.
  v3 -> v4:
    - better refactoring of check_func_call(), calculate subprogno outside
      of __check_func_call() helper. (Andrii)
    - better documentation (like the list of supported maps and their
      callback signatures) in uapi header. (Andrii)
    - implement and use ASSERT_LT in selftests. (Andrii)
    - a few other minor changes.
  v2 -> v3:
    - add comments in retrieve_ptr_limit(), which is in sanitize_ptr_alu(),
      to clarify the code is not executed for PTR_TO_MAP_KEY handling,
      but code is manually tested. (Alexei)
    - require BTF for callback function. (Alexei)
    - simplify hashmap/arraymap callback return handling as return value
      [0, 1] has been enforced by the verifier. (Alexei)
    - also mark global subprog (if used in ld_imm64) as RELO_SUBPROG_ADDR. (Andrii)
    - handle the condition to mark RELO_SUBPROG_ADDR properly. (Andrii)
    - make bpftool subprog insn offset dumping consist with pcrel calls. (Andrii)
  v1 -> v2:
    - setup callee frame in check_helper_call() and then proceed to verify
      helper return value as normal (Alexei)
    - use meta data to keep track of map/func pointer to avoid hard coding
      the register number (Alexei)
    - verify callback_fn return value range [0, 1]. (Alexei)
    - add migrate_{disable, enable} to ensure percpu value is the one
      bpf program expects to see. (Alexei)
    - change bpf_for_each_map_elem() return value to the number of iterated
      elements. (Andrii)
    - Change libbpf pseudo_func relo name to RELO_SUBPROG_ADDR and use
      more rigid checking for the relocation. (Andrii)
    - Better format to print out subprog address with bpftool. (Andrii)
    - Use bpf_prog_test_run to trigger bpf run, instead of bpf_iter. (Andrii)
    - Other misc changes.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 86fd1665 6b9e3331
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ struct bpf_local_storage;
struct bpf_local_storage_map;
struct kobject;
struct mem_cgroup;
struct bpf_func_state;

extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -129,6 +130,13 @@ struct bpf_map_ops {
	bool (*map_meta_equal)(const struct bpf_map *meta0,
			       const struct bpf_map *meta1);


	int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
					      struct bpf_func_state *caller,
					      struct bpf_func_state *callee);
	int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
				     void *callback_ctx, u64 flags);

	/* BTF name and id of struct allocated by map_alloc */
	const char * const map_btf_name;
	int *map_btf_id;
@@ -295,6 +303,8 @@ enum bpf_arg_type {
	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
	__BPF_ARG_TYPE_MAX,
};

@@ -411,6 +421,8 @@ enum bpf_reg_type {
	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
	PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
	PTR_TO_FUNC,		 /* reg points to a bpf program function */
	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
};

/* The information passed from prog-specific *_is_valid_access
@@ -1385,6 +1397,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
				struct bpf_link_info *info);

int map_set_for_each_callback_args(struct bpf_verifier_env *env,
				   struct bpf_func_state *caller,
				   struct bpf_func_state *callee);

int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@@ -1887,6 +1903,7 @@ extern const struct bpf_func_proto bpf_sock_from_file_proto;
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;

const struct bpf_func_proto *bpf_tracing_func_proto(
	enum bpf_func_id func_id, const struct bpf_prog *prog);
+3 −0
Original line number Diff line number Diff line
@@ -68,6 +68,8 @@ struct bpf_reg_state {
			unsigned long raw1;
			unsigned long raw2;
		} raw;

		u32 subprogno; /* for PTR_TO_FUNC */
	};
	/* For PTR_TO_PACKET, used to find other pointers with the same variable
	 * offset, so they can share range knowledge.
@@ -204,6 +206,7 @@ struct bpf_func_state {
	int acquired_refs;
	struct bpf_reference_state *refs;
	int allocated_stack;
	bool in_callback_fn;
	struct bpf_stack_state *stack;
};

+38 −0
Original line number Diff line number Diff line
@@ -393,6 +393,15 @@ enum bpf_link_type {
 *                   is struct/union.
 */
#define BPF_PSEUDO_BTF_ID	3
/* insn[0].src_reg:  BPF_PSEUDO_FUNC
 * insn[0].imm:      insn offset to the func
 * insn[1].imm:      0
 * insn[0].off:      0
 * insn[1].off:      0
 * ldimm64 rewrite:  address of the function
 * verifier type:    PTR_TO_FUNC.
 */
#define BPF_PSEUDO_FUNC		4

/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
 * offset to another bpf function
@@ -3909,6 +3918,34 @@ union bpf_attr {
 *		* **BPF_MTU_CHK_RET_FRAG_NEEDED**
 *		* **BPF_MTU_CHK_RET_SEGS_TOOBIG**
 *
 * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
 *	Description
 *		For each element in **map**, call **callback_fn** function with
 *		**map**, **callback_ctx** and other map-specific parameters.
 *		The **callback_fn** should be a static function and
 *		the **callback_ctx** should be a pointer to the stack.
 *		The **flags** is used to control certain aspects of the helper.
 *		Currently, the **flags** must be 0.
 *
 *		The following are a list of supported map types and their
 *		respective expected callback signatures:
 *
 *		BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
 *		BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
 *		BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
 *
 *		long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
 *
 *		For per_cpu maps, the map_value is the value on the cpu where the
 *		bpf_prog is running.
 *
 *		If **callback_fn** return 0, the helper will continue to the next
 *		element. If return value is 1, the helper will skip the rest of
 *		elements and return. Other return values are not used now.
 *
 *	Return
 *		The number of traversed map elements for success, **-EINVAL** for
 *		invalid **flags**.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -4075,6 +4112,7 @@ union bpf_attr {
	FN(ima_inode_hash),		\
	FN(sock_from_file),		\
	FN(check_mtu),			\
	FN(for_each_map_elem),		\
	/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+40 −0
Original line number Diff line number Diff line
@@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = {
	.seq_priv_size		= sizeof(struct bpf_iter_seq_array_map_info),
};

static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
				   void *callback_ctx, u64 flags)
{
	u32 i, key, num_elems = 0;
	struct bpf_array *array;
	bool is_percpu;
	u64 ret = 0;
	void *val;

	if (flags != 0)
		return -EINVAL;

	is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
	array = container_of(map, struct bpf_array, map);
	if (is_percpu)
		migrate_disable();
	for (i = 0; i < map->max_entries; i++) {
		if (is_percpu)
			val = this_cpu_ptr(array->pptrs[i]);
		else
			val = array->value + array->elem_size * i;
		num_elems++;
		key = i;
		ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
					(u64)(long)&key, (u64)(long)val,
					(u64)(long)callback_ctx, 0);
		/* return value: 0 - continue, 1 - stop and return */
		if (ret)
			break;
	}

	if (is_percpu)
		migrate_enable();
	return num_elems;
}

static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = {
	.map_meta_equal = array_map_meta_equal,
@@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = {
	.map_check_btf = array_map_check_btf,
	.map_lookup_batch = generic_map_lookup_batch,
	.map_update_batch = generic_map_update_batch,
	.map_set_for_each_callback_args = map_set_for_each_callback_args,
	.map_for_each_callback = bpf_for_each_array_elem,
	.map_btf_name = "bpf_array",
	.map_btf_id = &array_map_btf_id,
	.iter_seq_info = &iter_seq_info,
@@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
	.map_delete_elem = array_map_delete_elem,
	.map_seq_show_elem = percpu_array_map_seq_show_elem,
	.map_check_btf = array_map_check_btf,
	.map_set_for_each_callback_args = map_set_for_each_callback_args,
	.map_for_each_callback = bpf_for_each_array_elem,
	.map_btf_name = "bpf_array",
	.map_btf_id = &percpu_array_map_btf_id,
	.iter_seq_info = &iter_seq_info,
+16 −0
Original line number Diff line number Diff line
@@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
	 */
	return ret == 0 ? 0 : -EAGAIN;
}

BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
	   void *, callback_ctx, u64, flags)
{
	return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
}

const struct bpf_func_proto bpf_for_each_map_elem_proto = {
	.func		= bpf_for_each_map_elem,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_CONST_MAP_PTR,
	.arg2_type	= ARG_PTR_TO_FUNC,
	.arg3_type	= ARG_PTR_TO_STACK_OR_NULL,
	.arg4_type	= ARG_ANYTHING,
};
Loading