Commit bc34dee6 authored by Joanne Koong's avatar Joanne Koong Committed by Andrii Nakryiko
Browse files

bpf: Dynptr support for ring buffers



Currently, our only way of writing dynamically-sized data into a ring
buffer is through bpf_ringbuf_output but this incurs an extra memcpy
cost. bpf_ringbuf_reserve + bpf_ringbuf_commit avoids this extra
memcpy, but it can only safely support reservation sizes that are
statically known since the verifier cannot guarantee that the bpf
program won’t access memory outside the reserved space.

The bpf_dynptr abstraction allows for dynamically-sized ring buffer
reservations without the extra memcpy.

There are 3 new APIs:

long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr);
void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags);
void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags);

These closely follow the functionalities of the original ringbuf APIs.
For example, all ringbuffer dynptrs that have been reserved must be
either submitted or discarded before the program exits.

Signed-off-by: default avatarJoanne Koong <joannelkoong@gmail.com>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarDavid Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-4-joannelkoong@gmail.com
parent 263ae152
Loading
Loading
Loading
Loading
+14 −1
Original line number Diff line number Diff line
@@ -395,11 +395,14 @@ enum bpf_type_flag {
	/* DYNPTR points to memory local to the bpf program. */
	DYNPTR_TYPE_LOCAL	= BIT(8 + BPF_BASE_TYPE_BITS),

	/* DYNPTR points to a ringbuf record. */
	DYNPTR_TYPE_RINGBUF	= BIT(9 + BPF_BASE_TYPE_BITS),

	__BPF_TYPE_FLAG_MAX,
	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
};

#define DYNPTR_TYPE_FLAG_MASK	DYNPTR_TYPE_LOCAL
#define DYNPTR_TYPE_FLAG_MASK	(DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)

/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT	(1UL << BPF_BASE_TYPE_BITS)
@@ -2231,6 +2234,9 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto;
extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
extern const struct bpf_func_proto bpf_ringbuf_query_proto;
extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto;
extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto;
extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
@@ -2402,6 +2408,13 @@ enum bpf_dynptr_type {
	BPF_DYNPTR_TYPE_INVALID,
	/* Points to memory that is local to the bpf program */
	BPF_DYNPTR_TYPE_LOCAL,
	/* Underlying data is a ringbuf record */
	BPF_DYNPTR_TYPE_RINGBUF,
};

void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
		     enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
int bpf_dynptr_check_size(u32 size);

#endif /* _LINUX_BPF_H */
+2 −0
Original line number Diff line number Diff line
@@ -100,6 +100,8 @@ struct bpf_reg_state {
	 * for the purpose of tracking that it's freed.
	 * For PTR_TO_SOCKET this is used to share which pointers retain the
	 * same reference to the socket, to determine proper reference freeing.
	 * For stack slots that are dynptrs, this is used to track references to
	 * the dynptr to determine proper reference freeing.
	 */
	u32 id;
	/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
+35 −0
Original line number Diff line number Diff line
@@ -5189,6 +5189,38 @@ union bpf_attr {
 *	Return
 *		0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE,
 *		-EINVAL if flags is not 0.
 *
 * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)
 *	Description
 *		Reserve *size* bytes of payload in a ring buffer *ringbuf*
 *		through the dynptr interface. *flags* must be 0.
 *
 *		Please note that a corresponding bpf_ringbuf_submit_dynptr or
 *		bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the
 *		reservation fails. This is enforced by the verifier.
 *	Return
 *		0 on success, or a negative error in case of failure.
 *
 * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
 *	Description
 *		Submit reserved ring buffer sample, pointed to by *data*,
 *		through the dynptr interface. This is a no-op if the dynptr is
 *		invalid/null.
 *
 *		For more information on *flags*, please see
 *		'bpf_ringbuf_submit'.
 *	Return
 *		Nothing. Always succeeds.
 *
 * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags)
 *	Description
 *		Discard reserved ring buffer sample through the dynptr
 *		interface. This is a no-op if the dynptr is invalid/null.
 *
 *		For more information on *flags*, please see
 *		'bpf_ringbuf_discard'.
 *	Return
 *		Nothing. Always succeeds.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -5389,6 +5421,9 @@ union bpf_attr {
	FN(map_lookup_percpu_elem),     \
	FN(skc_to_mptcp_sock),		\
	FN(dynptr_from_mem),		\
	FN(ringbuf_reserve_dynptr),	\
	FN(ringbuf_submit_dynptr),	\
	FN(ringbuf_discard_dynptr),	\
	/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+10 −4
Original line number Diff line number Diff line
@@ -1423,12 +1423,12 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ
	ptr->size |= type << DYNPTR_TYPE_SHIFT;
}

static int bpf_dynptr_check_size(u32 size)
int bpf_dynptr_check_size(u32 size)
{
	return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
}

static void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
		     enum bpf_dynptr_type type, u32 offset, u32 size)
{
	ptr->data = data;
@@ -1437,7 +1437,7 @@ static void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
	bpf_dynptr_set_type(ptr, type);
}

static void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
{
	memset(ptr, 0, sizeof(*ptr));
}
@@ -1523,6 +1523,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
		return &bpf_ringbuf_discard_proto;
	case BPF_FUNC_ringbuf_query:
		return &bpf_ringbuf_query_proto;
	case BPF_FUNC_ringbuf_reserve_dynptr:
		return &bpf_ringbuf_reserve_dynptr_proto;
	case BPF_FUNC_ringbuf_submit_dynptr:
		return &bpf_ringbuf_submit_dynptr_proto;
	case BPF_FUNC_ringbuf_discard_dynptr:
		return &bpf_ringbuf_discard_dynptr_proto;
	case BPF_FUNC_for_each_map_elem:
		return &bpf_for_each_map_elem_proto;
	case BPF_FUNC_loop:
+78 −0
Original line number Diff line number Diff line
@@ -475,3 +475,81 @@ const struct bpf_func_proto bpf_ringbuf_query_proto = {
	.arg1_type	= ARG_CONST_MAP_PTR,
	.arg2_type	= ARG_ANYTHING,
};

BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags,
	   struct bpf_dynptr_kern *, ptr)
{
	struct bpf_ringbuf_map *rb_map;
	void *sample;
	int err;

	if (unlikely(flags)) {
		bpf_dynptr_set_null(ptr);
		return -EINVAL;
	}

	err = bpf_dynptr_check_size(size);
	if (err) {
		bpf_dynptr_set_null(ptr);
		return err;
	}

	rb_map = container_of(map, struct bpf_ringbuf_map, map);

	sample = __bpf_ringbuf_reserve(rb_map->rb, size);
	if (!sample) {
		bpf_dynptr_set_null(ptr);
		return -EINVAL;
	}

	bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size);

	return 0;
}

const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = {
	.func		= bpf_ringbuf_reserve_dynptr,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_CONST_MAP_PTR,
	.arg2_type	= ARG_ANYTHING,
	.arg3_type	= ARG_ANYTHING,
	.arg4_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT,
};

BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
{
	if (!ptr->data)
		return 0;

	bpf_ringbuf_commit(ptr->data, flags, false /* discard */);

	bpf_dynptr_set_null(ptr);

	return 0;
}

const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = {
	.func		= bpf_ringbuf_submit_dynptr,
	.ret_type	= RET_VOID,
	.arg1_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE,
	.arg2_type	= ARG_ANYTHING,
};

BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
{
	if (!ptr->data)
		return 0;

	bpf_ringbuf_commit(ptr->data, flags, true /* discard */);

	bpf_dynptr_set_null(ptr);

	return 0;
}

const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = {
	.func		= bpf_ringbuf_discard_dynptr,
	.ret_type	= RET_VOID,
	.arg1_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE,
	.arg2_type	= ARG_ANYTHING,
};
Loading