Commit f6e659b7 authored by Joanne Koong's avatar Joanne Koong Committed by Alexei Starovoitov
Browse files

selftests/bpf: Measure bpf_loop verifier performance



This patch tests bpf_loop in pyperf and strobemeta, and measures the
verifier performance of replacing the traditional for loop
with bpf_loop.

The results are as follows:

~strobemeta~

Baseline
    verification time 6808200 usec
    stack depth 496
    processed 554252 insns (limit 1000000) max_states_per_insn 16
    total_states 15878 peak_states 13489  mark_read 3110
    #192 verif_scale_strobemeta:OK (unrolled loop)

Using bpf_loop
    verification time 31589 usec
    stack depth 96+400
    processed 1513 insns (limit 1000000) max_states_per_insn 2
    total_states 106 peak_states 106 mark_read 60
    #193 verif_scale_strobemeta_bpf_loop:OK

~pyperf600~

Baseline
    verification time 29702486 usec
    stack depth 368
    processed 626838 insns (limit 1000000) max_states_per_insn 7
    total_states 30368 peak_states 30279 mark_read 748
    #182 verif_scale_pyperf600:OK (unrolled loop)

Using bpf_loop
    verification time 148488 usec
    stack depth 320+40
    processed 10518 insns (limit 1000000) max_states_per_insn 10
    total_states 705 peak_states 517 mark_read 38
    #183 verif_scale_pyperf600_bpf_loop:OK

Using the bpf_loop helper led to approximately a 99% decrease
in the verification time and in the number of instructions.

Signed-off-by: default avatarJoanne Koong <joannekoong@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211130030622.4131246-4-joannekoong@fb.com
parent 4e5070b6
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -115,6 +115,12 @@ void test_verif_scale_pyperf600()
	scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}

void test_verif_scale_pyperf600_bpf_loop(void)
{
	/* use the bpf_loop helper*/
	scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}

void test_verif_scale_pyperf600_nounroll()
{
	/* no unroll at all.
@@ -165,6 +171,12 @@ void test_verif_scale_strobemeta()
	scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}

void test_verif_scale_strobemeta_bpf_loop(void)
{
	/* use the bpf_loop helper*/
	scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}

void test_verif_scale_strobemeta_nounroll1()
{
	/* no unroll, tiny loops */
+70 −1
Original line number Diff line number Diff line
@@ -159,6 +159,59 @@ struct {
	__uint(value_size, sizeof(long long) * 127);
} stackmap SEC(".maps");

#ifdef USE_BPF_LOOP
struct process_frame_ctx {
	int cur_cpu;
	int32_t *symbol_counter;
	void *frame_ptr;
	FrameData *frame;
	PidData *pidData;
	Symbol *sym;
	Event *event;
	bool done;
};

#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))

static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
{
	int zero = 0;
	void *frame_ptr = ctx->frame_ptr;
	PidData *pidData = ctx->pidData;
	FrameData *frame = ctx->frame;
	int32_t *symbol_counter = ctx->symbol_counter;
	int cur_cpu = ctx->cur_cpu;
	Event *event = ctx->event;
	Symbol *sym = ctx->sym;

	if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
		int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
		int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);

		if (!symbol_id) {
			bpf_map_update_elem(&symbolmap, sym, &zero, 0);
			symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
			if (!symbol_id) {
				ctx->done = true;
				return 1;
			}
		}
		if (*symbol_id == new_symbol_id)
			(*symbol_counter)++;

		barrier_var(i);
		if (i >= STACK_MAX_LEN)
			return 1;

		event->stack[i] = *symbol_id;

		event->stack_len = i + 1;
		frame_ptr = frame->f_back;
	}
	return 0;
}
#endif /* USE_BPF_LOOP */

#ifdef GLOBAL_FUNC
__noinline
#elif defined(SUBPROGS)
@@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
		if (symbol_counter == NULL)
			return 0;
#ifdef USE_BPF_LOOP
	struct process_frame_ctx ctx = {
		.cur_cpu = cur_cpu,
		.symbol_counter = symbol_counter,
		.frame_ptr = frame_ptr,
		.frame = &frame,
		.pidData = pidData,
		.sym = &sym,
		.event = event,
	};

	bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
	if (ctx.done)
		return 0;
#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma clang loop unroll(full)
#endif
#endif /* NO_UNROLL */
		/* Unwind python stack */
		for (int i = 0; i < STACK_MAX_LEN; ++i) {
			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
@@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
				frame_ptr = frame.f_back;
			}
		}
#endif /* USE_BPF_LOOP */
		event->stack_complete = frame_ptr == NULL;
	} else {
		event->stack_complete = 1;
+6 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */

#define STACK_MAX_LEN 600
#define USE_BPF_LOOP
#include "pyperf.h"
+72 −3
Original line number Diff line number Diff line
@@ -445,6 +445,48 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
	return payload;
}

#ifdef USE_BPF_LOOP
enum read_type {
	READ_INT_VAR,
	READ_MAP_VAR,
	READ_STR_VAR,
};

struct read_var_ctx {
	struct strobemeta_payload *data;
	void *tls_base;
	struct strobemeta_cfg *cfg;
	void *payload;
	/* value gets mutated */
	struct strobe_value_generic *value;
	enum read_type type;
};

static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
{
	switch (ctx->type) {
	case READ_INT_VAR:
		if (index >= STROBE_MAX_INTS)
			return 1;
		read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
		break;
	case READ_MAP_VAR:
		if (index >= STROBE_MAX_MAPS)
			return 1;
		ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
					    ctx->value, ctx->data, ctx->payload);
		break;
	case READ_STR_VAR:
		if (index >= STROBE_MAX_STRS)
			return 1;
		ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
					     ctx->value, ctx->data, ctx->payload);
		break;
	}
	return 0;
}
#endif /* USE_BPF_LOOP */

/*
 * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
 * pointer to *right after* payload ends
@@ -475,11 +517,36 @@ static void *read_strobe_meta(struct task_struct *task,
	 */
	tls_base = (void *)task;

#ifdef USE_BPF_LOOP
	struct read_var_ctx ctx = {
		.cfg = cfg,
		.tls_base = tls_base,
		.value = &value,
		.data = data,
		.payload = payload,
	};
	int err;

	ctx.type = READ_INT_VAR;
	err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
	if (err != STROBE_MAX_INTS)
		return NULL;

	ctx.type = READ_STR_VAR;
	err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
	if (err != STROBE_MAX_STRS)
		return NULL;

	ctx.type = READ_MAP_VAR;
	err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
	if (err != STROBE_MAX_MAPS)
		return NULL;
#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma unroll
#endif
#endif /* NO_UNROLL */
	for (int i = 0; i < STROBE_MAX_INTS; ++i) {
		read_int_var(cfg, i, tls_base, &value, data);
	}
@@ -487,7 +554,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma clang loop unroll(disable)
#else
#pragma unroll
#endif
#endif /* NO_UNROLL */
	for (int i = 0; i < STROBE_MAX_STRS; ++i) {
		payload += read_str_var(cfg, i, tls_base, &value, data, payload);
	}
@@ -495,10 +562,12 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma clang loop unroll(disable)
#else
#pragma unroll
#endif
#endif /* NO_UNROLL */
	for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
		payload = read_map_var(cfg, i, tls_base, &value, data, payload);
	}
#endif /* USE_BPF_LOOP */

	/*
	 * return pointer right after end of payload, so it's possible to
	 * calculate exact amount of useful data that needs to be sent
+9 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2021 Facebook */

#define STROBE_MAX_INTS 2
#define STROBE_MAX_STRS 25
#define STROBE_MAX_MAPS 100
#define STROBE_MAX_MAP_ENTRIES 20
#define USE_BPF_LOOP
#include "strobemeta.h"