Commit d964f09a authored by Andrii Nakryiko's avatar Andrii Nakryiko
Browse files

Merge branch 'New benchmark for hashmap lookups'

Anton Protopopov says:

====================

Add a new benchmark for hashmap lookups and fix several typos.

In commit 3 I've patched the bench utility so that now command line options
can be reused by different benchmarks.

The benchmark itself is added in the last commit 7. I was using this benchmark
to test map lookup productivity when using a different hash function [1]. When
run with --quiet, the results can be easily plotted [2].  The results provided
by the benchmark look reasonable and match the results of my different
benchmarks (requiring to patch kernel to get actual statistics on map lookups).

Links:
  [1] https://fosdem.org/2023/schedule/event/bpf_hashing/
  [2] https://github.com/aspsk/bpf-bench/tree/master/hashmap-bench



Changes,
v1->v2:
- percpu_times_index[] is of wrong size (Martin)
- use base 0 for strtol (Andrii)
- just use -q without argument (Andrii)
- use less hacks when parsing arguments (Andrii)
====================

Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents 3538a0fb f371f2dc
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -638,6 +638,7 @@ $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -652,7 +653,9 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
		 $(OUTPUT)/bench_strncmp.o \
		 $(OUTPUT)/bench_bpf_hashmap_full_update.o \
		 $(OUTPUT)/bench_local_storage.o \
		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o
		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \
		 $(OUTPUT)/bench_bpf_hashmap_lookup.o \
		 #
	$(call msg,BINARY,,$@)
	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@

+47 −12
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ struct env env = {
	.warmup_sec = 1,
	.duration_sec = 5,
	.affinity = false,
	.quiet = false,
	.consumer_cnt = 1,
	.producer_cnt = 1,
};
@@ -262,6 +263,7 @@ static const struct argp_option opts[] = {
	{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
	{ "verbose", 'v', NULL, 0, "Verbose debug output"},
	{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
	{ "quiet", 'q', NULL, 0, "Be more quiet"},
	{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
	  "Set of CPUs for producer threads; implies --affinity"},
	{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
@@ -275,6 +277,7 @@ extern struct argp bench_bpf_loop_argp;
extern struct argp bench_local_storage_argp;
extern struct argp bench_local_storage_rcu_tasks_trace_argp;
extern struct argp bench_strncmp_argp;
extern struct argp bench_hashmap_lookup_argp;

static const struct argp_child bench_parsers[] = {
	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -284,13 +287,15 @@ static const struct argp_child bench_parsers[] = {
	{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
	{ &bench_local_storage_rcu_tasks_trace_argp, 0,
		"local_storage RCU Tasks Trace slowdown benchmark", 0 },
	{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
	{},
};

static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
/* Make pos_args global, so that we can run argp_parse twice, if necessary */
static int pos_args;

static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
	switch (key) {
	case 'v':
		env.verbose = true;
@@ -329,6 +334,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
	case 'a':
		env.affinity = true;
		break;
	case 'q':
		env.quiet = true;
		break;
	case ARG_PROD_AFFINITY_SET:
		env.affinity = true;
		if (parse_num_list(arg, &env.prod_cpus.cpus,
@@ -359,7 +367,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
	return 0;
}

static void parse_cmdline_args(int argc, char **argv)
static void parse_cmdline_args_init(int argc, char **argv)
{
	static const struct argp argp = {
		.options = opts,
@@ -369,8 +377,24 @@ static void parse_cmdline_args(int argc, char **argv)
	};
	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
		exit(1);
	if (!env.list && !env.bench_name) {
		argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
}

static void parse_cmdline_args_final(int argc, char **argv)
{
	struct argp_child bench_parsers[2] = {};
	const struct argp argp = {
		.options = opts,
		.parser = parse_arg,
		.doc = argp_program_doc,
		.children = bench_parsers,
	};

	/* Parse arguments the second time with the correct set of parsers */
	if (bench->argp) {
		bench_parsers[0].argp = bench->argp;
		bench_parsers[0].header = bench->name;
		pos_args = 0;
		if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
			exit(1);
	}
}
@@ -490,6 +514,7 @@ extern const struct bench bench_local_storage_cache_seq_get;
extern const struct bench bench_local_storage_cache_interleaved_get;
extern const struct bench bench_local_storage_cache_hashmap_control;
extern const struct bench bench_local_storage_tasks_trace;
extern const struct bench bench_bpf_hashmap_lookup;

static const struct bench *benchs[] = {
	&bench_count_global,
@@ -529,17 +554,17 @@ static const struct bench *benchs[] = {
	&bench_local_storage_cache_interleaved_get,
	&bench_local_storage_cache_hashmap_control,
	&bench_local_storage_tasks_trace,
	&bench_bpf_hashmap_lookup,
};

static void setup_benchmark()
static void find_benchmark(void)
{
	int i, err;
	int i;

	if (!env.bench_name) {
		fprintf(stderr, "benchmark name is not specified\n");
		exit(1);
	}

	for (i = 0; i < ARRAY_SIZE(benchs); i++) {
		if (strcmp(benchs[i]->name, env.bench_name) == 0) {
			bench = benchs[i];
@@ -550,7 +575,13 @@ static void setup_benchmark()
		fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
		exit(1);
	}
}

static void setup_benchmark(void)
{
	int i, err;

	if (!env.quiet)
		printf("Setting up benchmark '%s'...\n", bench->name);

	state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
@@ -597,6 +628,7 @@ static void setup_benchmark()
					    next_cpu(&env.prod_cpus));
	}

	if (!env.quiet)
		printf("Benchmark '%s' started.\n", bench->name);
}

@@ -621,7 +653,7 @@ static void collect_measurements(long delta_ns) {

int main(int argc, char **argv)
{
	parse_cmdline_args(argc, argv);
	parse_cmdline_args_init(argc, argv);

	if (env.list) {
		int i;
@@ -633,6 +665,9 @@ int main(int argc, char **argv)
		return 0;
	}

	find_benchmark();
	parse_cmdline_args_final(argc, argv);

	setup_benchmark();

	setup_timer();
+2 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ struct env {
	bool verbose;
	bool list;
	bool affinity;
	bool quiet;
	int consumer_cnt;
	int producer_cnt;
	struct cpu_set prod_cpus;
@@ -47,6 +48,7 @@ struct bench_res {

struct bench {
	const char *name;
	const struct argp *argp;
	void (*validate)(void);
	void (*setup)(void);
	void *(*producer_thread)(void *ctx);
+5 −0
Original line number Diff line number Diff line
@@ -428,6 +428,7 @@ static void *consumer(void *input)

const struct bench bench_bloom_lookup = {
	.name = "bloom-lookup",
	.argp = &bench_bloom_map_argp,
	.validate = validate,
	.setup = bloom_lookup_setup,
	.producer_thread = producer,
@@ -439,6 +440,7 @@ const struct bench bench_bloom_lookup = {

const struct bench bench_bloom_update = {
	.name = "bloom-update",
	.argp = &bench_bloom_map_argp,
	.validate = validate,
	.setup = bloom_update_setup,
	.producer_thread = producer,
@@ -450,6 +452,7 @@ const struct bench bench_bloom_update = {

const struct bench bench_bloom_false_positive = {
	.name = "bloom-false-positive",
	.argp = &bench_bloom_map_argp,
	.validate = validate,
	.setup = false_positive_setup,
	.producer_thread = producer,
@@ -461,6 +464,7 @@ const struct bench bench_bloom_false_positive = {

const struct bench bench_hashmap_without_bloom = {
	.name = "hashmap-without-bloom",
	.argp = &bench_bloom_map_argp,
	.validate = validate,
	.setup = hashmap_no_bloom_setup,
	.producer_thread = producer,
@@ -472,6 +476,7 @@ const struct bench bench_hashmap_without_bloom = {

const struct bench bench_hashmap_with_bloom = {
	.name = "hashmap-with-bloom",
	.argp = &bench_bloom_map_argp,
	.validate = validate,
	.setup = hashmap_with_bloom_setup,
	.producer_thread = producer,
+2 −3
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Bytedance */

#include <argp.h>
#include "bench.h"
#include "bpf_hashmap_full_update_bench.skel.h"
#include "bpf_util.h"
@@ -68,7 +67,7 @@ static void setup(void)
		bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
}

void hashmap_report_final(struct bench_res res[], int res_cnt)
static void hashmap_report_final(struct bench_res res[], int res_cnt)
{
	unsigned int nr_cpus = bpf_num_possible_cpus();
	int i;
@@ -85,7 +84,7 @@ void hashmap_report_final(struct bench_res res[], int res_cnt)
}

const struct bench bench_bpf_hashmap_full_update = {
	.name = "bpf-hashmap-ful-update",
	.name = "bpf-hashmap-full-update",
	.validate = validate,
	.setup = setup,
	.producer_thread = producer,
Loading