Merge branch 'fix-the-unmatched-unit_size-of-bpf_mem_cache' (9458964a) · Commits · EulixOS / Software / Kernel

kernel/bpf/memalloc.c

+83 −4

Original line number	Diff line number	Diff line
		@@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)
		* Typical case will be between 11K and 116K closer to 11K.
		* bpf progs can and should share bpf_mem_cache when possible.
		*/

		static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
		static void init_refill_work(struct bpf_mem_cache *c)
		{
		init_irq_work(&c->refill_work, bpf_mem_refill);
		if (c->unit_size <= 256) {
		@@ -476,7 +475,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
		c->high_watermark = max(96 * 256 / c->unit_size, 3);
		}
		c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1);
		}

		static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
		{
		/* To avoid consuming memory assume that 1st run of bpf
		* prog won't be doing more than 4 map_update_elem from
		* irq disabled region
		@@ -484,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
		alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
		}

		static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
		{
		struct llist_node *first;
		unsigned int obj_size;

		first = c->free_llist.first;
		if (!first)
		return 0;

		obj_size = ksize(first);
		if (obj_size != c->unit_size) {
		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
		idx, obj_size, c->unit_size);
		return -EINVAL;
		}
		return 0;
		}

		/* When size != 0 bpf_mem_cache for each cpu.
		* This is typical bpf hash map use case when all elements have equal size.
		*
		@@ -494,10 +514,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
		int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
		{
		static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
		int cpu, i, err, unit_size, percpu_size = 0;
		struct bpf_mem_caches cc, __percpu pcc;
		struct bpf_mem_cache c, __percpu pc;
		struct obj_cgroup *objcg = NULL;
		int cpu, i, unit_size, percpu_size = 0;

		if (size) {
		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
		@@ -521,6 +541,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
		c->objcg = objcg;
		c->percpu_size = percpu_size;
		c->tgt = c;
		init_refill_work(c);
		prefill_mem_cache(c, cpu);
		}
		ma->cache = pc;
		@@ -534,6 +555,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
		pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
		if (!pcc)
		return -ENOMEM;
		err = 0;
		#ifdef CONFIG_MEMCG_KMEM
		objcg = get_obj_cgroup_from_current();
		#endif
		@@ -544,11 +566,30 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
		c->unit_size = sizes[i];
		c->objcg = objcg;
		c->tgt = c;

		init_refill_work(c);
		/* Another bpf_mem_cache will be used when allocating
		* c->unit_size in bpf_mem_alloc(), so doesn't prefill
		* for the bpf_mem_cache because these free objects will
		* never be used.
		*/
		if (i != bpf_mem_cache_idx(c->unit_size))
		continue;
		prefill_mem_cache(c, cpu);
		err = check_obj_size(c, i);
		if (err)
		goto out;
		}
		}

		out:
		ma->caches = pcc;
		return 0;
		/* refill_work is either zeroed or initialized, so it is safe to
		* call irq_work_sync().
		*/
		if (err)
		bpf_mem_alloc_destroy(ma);
		return err;
		}

		static void drain_mem_cache(struct bpf_mem_cache *c)
		@@ -916,3 +957,41 @@ void notrace bpf_mem_cache_alloc_flags(struct bpf_mem_alloc ma, gfp_t flags)

		return !ret ? NULL : ret + LLIST_NODE_SZ;
		}

		/* Most of the logic is taken from setup_kmalloc_cache_index_table() */
		static __init int bpf_mem_cache_adjust_size(void)
		{
		unsigned int size, index;

		/* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be
		* up-to 256-bytes.
		*/
		size = KMALLOC_MIN_SIZE;
		if (size <= 192)
		index = size_index[(size - 1) / 8];
		else
		index = fls(size - 1) - 1;
		for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8)
		size_index[(size - 1) / 8] = index;

		/* The minimal alignment is 64-bytes, so disable 96-bytes cache and
		* use 128-bytes cache instead.
		*/
		if (KMALLOC_MIN_SIZE >= 64) {
		index = size_index[(128 - 1) / 8];
		for (size = 64 + 8; size <= 96; size += 8)
		size_index[(size - 1) / 8] = index;
		}

		/* The minimal alignment is 128-bytes, so disable 192-bytes cache and
		* use 256-bytes cache instead.
		*/
		if (KMALLOC_MIN_SIZE >= 128) {
		index = fls(256 - 1) - 1;
		for (size = 128 + 8; size <= 192; size += 8)
		size_index[(size - 1) / 8] = index;
		}

		return 0;
		}
		subsys_initcall(bpf_mem_cache_adjust_size);

tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c

0 → 100644

+50 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0
		/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
		#define _GNU_SOURCE
		#include <sched.h>
		#include <pthread.h>
		#include <stdbool.h>
		#include <bpf/btf.h>
		#include <test_progs.h>

		#include "test_bpf_ma.skel.h"

		void test_test_bpf_ma(void)
		{
		struct test_bpf_ma *skel;
		struct btf *btf;
		int i, err;

		skel = test_bpf_ma__open();
		if (!ASSERT_OK_PTR(skel, "open"))
		return;

		btf = bpf_object__btf(skel->obj);
		if (!ASSERT_OK_PTR(btf, "btf"))
		goto out;

		for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) {
		char name[32];
		int id;

		snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]);
		id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT);
		if (!ASSERT_GT(id, 0, "bin_data"))
		goto out;
		skel->rodata->data_btf_ids[i] = id;
		}

		err = test_bpf_ma__load(skel);
		if (!ASSERT_OK(err, "load"))
		goto out;

		err = test_bpf_ma__attach(skel);
		if (!ASSERT_OK(err, "attach"))
		goto out;

		skel->bss->pid = getpid();
		usleep(1);
		ASSERT_OK(skel->bss->err, "test error");
		out:
		test_bpf_ma__destroy(skel);
		}

tools/testing/selftests/bpf/progs/test_bpf_ma.c

0 → 100644

+123 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0
		/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
		#include <vmlinux.h>
		#include <bpf/bpf_tracing.h>
		#include <bpf/bpf_helpers.h>

		#include "bpf_experimental.h"
		#include "bpf_misc.h"

		#ifndef ARRAY_SIZE
		#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
		#endif

		struct generic_map_value {
		void *data;
		};

		char _license[] SEC("license") = "GPL";

		const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096};
		const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {};

		int err = 0;
		int pid = 0;

		#define DEFINE_ARRAY_WITH_KPTR(_size) \
		struct bin_data_##_size { \
		char data[_size - sizeof(void *)]; \
		}; \
		struct map_value_##_size { \
		struct bin_data_##_size __kptr * data; \
		/* To emit BTF info for bin_data_xx */ \
		struct bin_data_##_size not_used; \
		}; \
		struct { \
		__uint(type, BPF_MAP_TYPE_ARRAY); \
		__type(key, int); \
		__type(value, struct map_value_##_size); \
		__uint(max_entries, 128); \
		} array_##_size SEC(".maps");

		static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch,
		unsigned int idx)
		{
		struct generic_map_value *value;
		unsigned int i, key;
		void old, new;

		for (i = 0; i < batch; i++) {
		key = i;
		value = bpf_map_lookup_elem(map, &key);
		if (!value) {
		err = 1;
		return;
		}
		new = bpf_obj_new_impl(data_btf_ids[idx], NULL);
		if (!new) {
		err = 2;
		return;
		}
		old = bpf_kptr_xchg(&value->data, new);
		if (old) {
		bpf_obj_drop(old);
		err = 3;
		return;
		}
		}
		for (i = 0; i < batch; i++) {
		key = i;
		value = bpf_map_lookup_elem(map, &key);
		if (!value) {
		err = 4;
		return;
		}
		old = bpf_kptr_xchg(&value->data, NULL);
		if (!old) {
		err = 5;
		return;
		}
		bpf_obj_drop(old);
		}
		}

		#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \
		batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx)

		DEFINE_ARRAY_WITH_KPTR(8);
		DEFINE_ARRAY_WITH_KPTR(16);
		DEFINE_ARRAY_WITH_KPTR(32);
		DEFINE_ARRAY_WITH_KPTR(64);
		DEFINE_ARRAY_WITH_KPTR(96);
		DEFINE_ARRAY_WITH_KPTR(128);
		DEFINE_ARRAY_WITH_KPTR(192);
		DEFINE_ARRAY_WITH_KPTR(256);
		DEFINE_ARRAY_WITH_KPTR(512);
		DEFINE_ARRAY_WITH_KPTR(1024);
		DEFINE_ARRAY_WITH_KPTR(2048);
		DEFINE_ARRAY_WITH_KPTR(4096);

		SEC("fentry/" SYS_PREFIX "sys_nanosleep")
		int test_bpf_mem_alloc_free(void *ctx)
		{
		if ((u32)bpf_get_current_pid_tgid() != pid)
		return 0;

		/* Alloc 128 8-bytes objects in batch to trigger refilling,
		* then free 128 8-bytes objects in batch to trigger freeing.
		*/
		CALL_BATCH_ALLOC_FREE(8, 128, 0);
		CALL_BATCH_ALLOC_FREE(16, 128, 1);
		CALL_BATCH_ALLOC_FREE(32, 128, 2);
		CALL_BATCH_ALLOC_FREE(64, 128, 3);
		CALL_BATCH_ALLOC_FREE(96, 128, 4);
		CALL_BATCH_ALLOC_FREE(128, 128, 5);
		CALL_BATCH_ALLOC_FREE(192, 128, 6);
		CALL_BATCH_ALLOC_FREE(256, 128, 7);
		CALL_BATCH_ALLOC_FREE(512, 64, 8);
		CALL_BATCH_ALLOC_FREE(1024, 32, 9);
		CALL_BATCH_ALLOC_FREE(2048, 16, 10);
		CALL_BATCH_ALLOC_FREE(4096, 8, 11);

		return 0;
		}