Commit b66ccae0 authored by David Vernet's avatar David Vernet Committed by Andrii Nakryiko
Browse files

bpf: Add libbpf logic for user-space ring buffer



Now that all of the logic is in place in the kernel to support user-space
produced ring buffers, we can add the user-space logic to libbpf. This
patch therefore adds the following public symbols to libbpf:

struct user_ring_buffer *
user_ring_buffer__new(int map_fd,
		      const struct user_ring_buffer_opts *opts);
void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
                                         __u32 size, int timeout_ms);
void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);
void user_ring_buffer__discard(struct user_ring_buffer *rb,
void user_ring_buffer__free(struct user_ring_buffer *rb);

A user-space producer must first create a struct user_ring_buffer * object
with user_ring_buffer__new(), and can then reserve samples in the
ring buffer using one of the following two symbols:

void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
                                         __u32 size, int timeout_ms);

With user_ring_buffer__reserve(), a pointer to a 'size' region of the ring
buffer will be returned if sufficient space is available in the buffer.
user_ring_buffer__reserve_blocking() provides similar semantics, but will
block for up to 'timeout_ms' in epoll_wait if there is insufficient space
in the buffer. This function has the guarantee from the kernel that it will
receive at least one event-notification per invocation to
bpf_ringbuf_drain(), provided that at least one sample is drained, and the
BPF program did not pass the BPF_RB_NO_WAKEUP flag to bpf_ringbuf_drain().

Once a sample is reserved, it must either be committed to the ring buffer
with user_ring_buffer__submit(), or discarded with
user_ring_buffer__discard().

Signed-off-by: default avatarDavid Vernet <void@manifault.com>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220920000100.477320-4-void@manifault.com
parent 20571567
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -2373,6 +2373,12 @@ static size_t adjust_ringbuf_sz(size_t sz)
	return sz;
}

static bool map_is_ringbuf(const struct bpf_map *map)
{
	return map->def.type == BPF_MAP_TYPE_RINGBUF ||
	       map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
}

static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
{
	map->def.type = def->map_type;
@@ -2387,7 +2393,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
	map->btf_value_type_id = def->value_type_id;

	/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
	if (map->def.type == BPF_MAP_TYPE_RINGBUF)
	if (map_is_ringbuf(map))
		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);

	if (def->parts & MAP_DEF_MAP_TYPE)
@@ -4370,7 +4376,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
	map->def.max_entries = max_entries;

	/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
	if (map->def.type == BPF_MAP_TYPE_RINGBUF)
	if (map_is_ringbuf(map))
		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);

	return 0;
+107 −0
Original line number Diff line number Diff line
@@ -1011,6 +1011,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,

/* Ring buffer APIs */
struct ring_buffer;
struct user_ring_buffer;

typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);

@@ -1030,6 +1031,112 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);

struct user_ring_buffer_opts {
	size_t sz; /* size of this struct, for forward/backward compatibility */
};

#define user_ring_buffer_opts__last_field sz

/* @brief **user_ring_buffer__new()** creates a new instance of a user ring
 * buffer.
 *
 * @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map.
 * @param opts Options for how the ring buffer should be created.
 * @return A user ring buffer on success; NULL and errno being set on a
 * failure.
 */
LIBBPF_API struct user_ring_buffer *
user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts);

/* @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the
 * user ring buffer.
 * @param rb A pointer to a user ring buffer.
 * @param size The size of the sample, in bytes.
 * @return A pointer to an 8-byte aligned reserved region of the user ring
 * buffer; NULL, and errno being set if a sample could not be reserved.
 *
 * This function is *not* thread safe, and callers must synchronize accessing
 * this function if there are multiple producers.  If a size is requested that
 * is larger than the size of the entire ring buffer, errno will be set to
 * E2BIG and NULL is returned. If the ring buffer could accommodate the size,
 * but currently does not have enough space, errno is set to ENOSPC and NULL is
 * returned.
 *
 * After initializing the sample, callers must invoke
 * **user_ring_buffer__submit()** to post the sample to the kernel. Otherwise,
 * the sample must be freed with **user_ring_buffer__discard()**.
 */
LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);

/* @brief **user_ring_buffer__reserve_blocking()** reserves a record in the
 * ring buffer, possibly blocking for up to @timeout_ms until a sample becomes
 * available.
 * @param rb The user ring buffer.
 * @param size The size of the sample, in bytes.
 * @param timeout_ms The amount of time, in milliseconds, for which the caller
 * should block when waiting for a sample. -1 causes the caller to block
 * indefinitely.
 * @return A pointer to an 8-byte aligned reserved region of the user ring
 * buffer; NULL, and errno being set if a sample could not be reserved.
 *
 * This function is *not* thread safe, and callers must synchronize
 * accessing this function if there are multiple producers
 *
 * If **timeout_ms** is -1, the function will block indefinitely until a sample
 * becomes available. Otherwise, **timeout_ms** must be non-negative, or errno
 * is set to EINVAL, and NULL is returned. If **timeout_ms** is 0, no blocking
 * will occur and the function will return immediately after attempting to
 * reserve a sample.
 *
 * If **size** is larger than the size of the entire ring buffer, errno is set
 * to E2BIG and NULL is returned. If the ring buffer could accommodate
 * **size**, but currently does not have enough space, the caller will block
 * until at most **timeout_ms** has elapsed. If insufficient space is available
 * at that time, errno is set to ENOSPC, and NULL is returned.
 *
 * The kernel guarantees that it will wake up this thread to check if
 * sufficient space is available in the ring buffer at least once per
 * invocation of the **bpf_ringbuf_drain()** helper function, provided that at
 * least one sample is consumed, and the BPF program did not invoke the
 * function with BPF_RB_NO_WAKEUP. A wakeup may occur sooner than that, but the
 * kernel does not guarantee this. If the helper function is invoked with
 * BPF_RB_FORCE_WAKEUP, a wakeup event will be sent even if no sample is
 * consumed.
 *
 * When a sample of size **size** is found within **timeout_ms**, a pointer to
 * the sample is returned. After initializing the sample, callers must invoke
 * **user_ring_buffer__submit()** to post the sample to the ring buffer.
 * Otherwise, the sample must be freed with **user_ring_buffer__discard()**.
 */
LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
						    __u32 size,
						    int timeout_ms);

/* @brief **user_ring_buffer__submit()** submits a previously reserved sample
 * into the ring buffer.
 * @param rb The user ring buffer.
 * @param sample A reserved sample.
 *
 * It is not necessary to synchronize amongst multiple producers when invoking
 * this function.
 */
LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);

/* @brief **user_ring_buffer__discard()** discards a previously reserved sample.
 * @param rb The user ring buffer.
 * @param sample A reserved sample.
 *
 * It is not necessary to synchronize amongst multiple producers when invoking
 * this function.
 */
LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample);

/* @brief **user_ring_buffer__free()** frees a ring buffer that was previously
 * created with **user_ring_buffer__new()**.
 * @param rb The user ring buffer being freed.
 */
LIBBPF_API void user_ring_buffer__free(struct user_ring_buffer *rb);

/* Perf buffer APIs */
struct perf_buffer;

+10 −0
Original line number Diff line number Diff line
@@ -368,3 +368,13 @@ LIBBPF_1.0.0 {
		libbpf_bpf_prog_type_str;
		perf_buffer__buffer;
};

LIBBPF_1.1.0 {
	global:
		user_ring_buffer__discard;
		user_ring_buffer__free;
		user_ring_buffer__new;
		user_ring_buffer__reserve;
		user_ring_buffer__reserve_blocking;
		user_ring_buffer__submit;
} LIBBPF_1.0.0;
+1 −0
Original line number Diff line number Diff line
@@ -231,6 +231,7 @@ static int probe_map_create(enum bpf_map_type map_type)
			return btf_fd;
		break;
	case BPF_MAP_TYPE_RINGBUF:
	case BPF_MAP_TYPE_USER_RINGBUF:
		key_size = 0;
		value_size = 0;
		max_entries = 4096;
+1 −1
Original line number Diff line number Diff line
@@ -4,6 +4,6 @@
#define __LIBBPF_VERSION_H

#define LIBBPF_MAJOR_VERSION 1
#define LIBBPF_MINOR_VERSION 0
#define LIBBPF_MINOR_VERSION 1

#endif /* __LIBBPF_VERSION_H */
Loading