Merge branch 'Optimize performance of update hash-map when free is zero' (d5e9aeda) · Commits · EulixOS / Software / Kernel

kernel/bpf/percpu_freelist.c

+14 −6

Original line number	Diff line number	Diff line
		@@ -31,7 +31,7 @@ static inline void pcpu_freelist_push_node(struct pcpu_freelist_head *head,
		struct pcpu_freelist_node *node)
		{
		node->next = head->first;
		head->first = node;
		WRITE_ONCE(head->first, node);
		}

		static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
		@@ -130,14 +130,17 @@ static struct pcpu_freelist_node ___pcpu_freelist_pop(struct pcpu_freelist s)
		orig_cpu = cpu = raw_smp_processor_id();
		while (1) {
		head = per_cpu_ptr(s->freelist, cpu);
		if (!READ_ONCE(head->first))
		goto next_cpu;
		raw_spin_lock(&head->lock);
		node = head->first;
		if (node) {
		head->first = node->next;
		WRITE_ONCE(head->first, node->next);
		raw_spin_unlock(&head->lock);
		return node;
		}
		raw_spin_unlock(&head->lock);
		next_cpu:
		cpu = cpumask_next(cpu, cpu_possible_mask);
		if (cpu >= nr_cpu_ids)
		cpu = 0;
		@@ -146,10 +149,12 @@ static struct pcpu_freelist_node ___pcpu_freelist_pop(struct pcpu_freelist s)
		}

		/* per cpu lists are all empty, try extralist */
		if (!READ_ONCE(s->extralist.first))
		return NULL;
		raw_spin_lock(&s->extralist.lock);
		node = s->extralist.first;
		if (node)
		s->extralist.first = node->next;
		WRITE_ONCE(s->extralist.first, node->next);
		raw_spin_unlock(&s->extralist.lock);
		return node;
		}
		@@ -164,15 +169,18 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
		orig_cpu = cpu = raw_smp_processor_id();
		while (1) {
		head = per_cpu_ptr(s->freelist, cpu);
		if (!READ_ONCE(head->first))
		goto next_cpu;
		if (raw_spin_trylock(&head->lock)) {
		node = head->first;
		if (node) {
		head->first = node->next;
		WRITE_ONCE(head->first, node->next);
		raw_spin_unlock(&head->lock);
		return node;
		}
		raw_spin_unlock(&head->lock);
		}
		next_cpu:
		cpu = cpumask_next(cpu, cpu_possible_mask);
		if (cpu >= nr_cpu_ids)
		cpu = 0;
		@@ -181,11 +189,11 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
		}

		/* cannot pop from per cpu lists, try extralist */
		if (!raw_spin_trylock(&s->extralist.lock))
		if (!READ_ONCE(s->extralist.first) \|\| !raw_spin_trylock(&s->extralist.lock))
		return NULL;
		node = s->extralist.first;
		if (node)
		s->extralist.first = node->next;
		WRITE_ONCE(s->extralist.first, node->next);
		raw_spin_unlock(&s->extralist.lock);
		return node;
		}

tools/testing/selftests/bpf/Makefile

+3 −1

Original line number	Diff line number	Diff line
		@@ -560,6 +560,7 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
		$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
		$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
		$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
		$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
		$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
		$(OUTPUT)/bench: LDLIBS += -lm
		$(OUTPUT)/bench: $(OUTPUT)/bench.o \
		@@ -571,7 +572,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
		$(OUTPUT)/bench_ringbufs.o \
		$(OUTPUT)/bench_bloom_filter_map.o \
		$(OUTPUT)/bench_bpf_loop.o \
		$(OUTPUT)/bench_strncmp.o
		$(OUTPUT)/bench_strncmp.o \
		$(OUTPUT)/bench_bpf_hashmap_full_update.o
		$(call msg,BINARY,,$@)
		$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@

tools/testing/selftests/bpf/bench.c

+2 −0

Original line number	Diff line number	Diff line
		@@ -396,6 +396,7 @@ extern const struct bench bench_hashmap_with_bloom;
		extern const struct bench bench_bpf_loop;
		extern const struct bench bench_strncmp_no_helper;
		extern const struct bench bench_strncmp_helper;
		extern const struct bench bench_bpf_hashmap_full_update;

		static const struct bench *benchs[] = {
		&bench_count_global,
		@@ -430,6 +431,7 @@ static const struct bench *benchs[] = {
		&bench_bpf_loop,
		&bench_strncmp_no_helper,
		&bench_strncmp_helper,
		&bench_bpf_hashmap_full_update,
		};

		static void setup_benchmark()

tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c

0 → 100644

+96 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0
		/* Copyright (c) 2022 Bytedance */

		#include <argp.h>
		#include "bench.h"
		#include "bpf_hashmap_full_update_bench.skel.h"
		#include "bpf_util.h"

		/* BPF triggering benchmarks */
		static struct ctx {
		struct bpf_hashmap_full_update_bench *skel;
		} ctx;

		#define MAX_LOOP_NUM 10000

		static void validate(void)
		{
		if (env.consumer_cnt != 1) {
		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
		exit(1);
		}
		}

		static void producer(void input)
		{
		while (true) {
		/* trigger the bpf program */
		syscall(__NR_getpgid);
		}

		return NULL;
		}

		static void consumer(void input)
		{
		return NULL;
		}

		static void measure(struct bench_res *res)
		{
		}

		static void setup(void)
		{
		struct bpf_link *link;
		int map_fd, i, max_entries;

		setup_libbpf();

		ctx.skel = bpf_hashmap_full_update_bench__open_and_load();
		if (!ctx.skel) {
		fprintf(stderr, "failed to open skeleton\n");
		exit(1);
		}

		ctx.skel->bss->nr_loops = MAX_LOOP_NUM;

		link = bpf_program__attach(ctx.skel->progs.benchmark);
		if (!link) {
		fprintf(stderr, "failed to attach program!\n");
		exit(1);
		}

		/* fill hash_map */
		map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
		max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench);
		for (i = 0; i < max_entries; i++)
		bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
		}

		void hashmap_report_final(struct bench_res res[], int res_cnt)
		{
		unsigned int nr_cpus = bpf_num_possible_cpus();
		int i;

		for (i = 0; i < nr_cpus; i++) {
		u64 time = ctx.skel->bss->percpu_time[i];

		if (!time)
		continue;

		printf("%d:hash_map_full_perf %lld events per sec\n",
		i, ctx.skel->bss->nr_loops * 1000000000ll / time);
		}
		}

		const struct bench bench_bpf_hashmap_full_update = {
		.name = "bpf-hashmap-ful-update",
		.validate = validate,
		.setup = setup,
		.producer_thread = producer,
		.consumer_thread = consumer,
		.measure = measure,
		.report_progress = NULL,
		.report_final = hashmap_report_final,
		};

tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh

0 → 100755

+11 −0

Original line number	Diff line number	Diff line
		#!/bin/bash
		# SPDX-License-Identifier: GPL-2.0

		source ./benchs/run_common.sh

		set -eufo pipefail

		nr_threads=`expr $(cat /proc/cpuinfo \| grep "processor"\| wc -l) - 1`
		summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update)
		printf "$summary"
		printf "\n"