Commit e65a5c6e authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov
Browse files

bpf: Add a few bpf mem allocator functions



This patch adds a few bpf mem allocator functions which will
be used in the bpf_local_storage in a later patch.

bpf_mem_cache_alloc_flags(..., gfp_t flags) is added. When the
flags == GFP_KERNEL, it will fallback to __alloc(..., GFP_KERNEL).
bpf_local_storage knows its running context is sleepable (GFP_KERNEL)
and provides a better guarantee on memory allocation.

bpf_local_storage has some uncommon cases that its selem
cannot be reused immediately. It handles its own
rcu_head and goes through a rcu_trace gp and then free it.
bpf_mem_cache_raw_free() is added for direct free purpose
without leaking the LLIST_NODE_SZ internal knowledge.
During free time, the 'struct bpf_mem_alloc *ma' is no longer
available. However, the caller should know if it is
percpu memory or not and it can call different raw_free functions.
bpf_local_storage does not support percpu value, so only
the non-percpu 'bpf_mem_cache_raw_free()' is added in
this patch.

Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20230322215246.1675516-2-martin.lau@linux.dev


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent e9936076
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -31,5 +31,7 @@ void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
/* kmem_cache_alloc/free equivalent: */
void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
void bpf_mem_cache_raw_free(void *ptr);
void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags);

#endif /* _BPF_MEM_ALLOC_H */
+50 −9
Original line number Diff line number Diff line
@@ -121,15 +121,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
	return entry;
}

static void *__alloc(struct bpf_mem_cache *c, int node)
static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
{
	/* Allocate, but don't deplete atomic reserves that typical
	 * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc
	 * will allocate from the current numa node which is what we
	 * want here.
	 */
	gfp_t flags = GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT;

	if (c->percpu_size) {
		void **obj = kmalloc_node(c->percpu_size, flags, node);
		void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
@@ -185,7 +178,12 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
		 */
		obj = __llist_del_first(&c->free_by_rcu);
		if (!obj) {
			obj = __alloc(c, node);
			/* Allocate, but don't deplete atomic reserves that typical
			 * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc
			 * will allocate from the current numa node which is what we
			 * want here.
			 */
			obj = __alloc(c, node, GFP_NOWAIT | __GFP_NOWARN | __GFP_ACCOUNT);
			if (!obj)
				break;
		}
@@ -676,3 +674,46 @@ void notrace bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr)

	unit_free(this_cpu_ptr(ma->cache), ptr);
}

/* Directly does a kfree() without putting 'ptr' back to the free_llist
 * for reuse and without waiting for a rcu_tasks_trace gp.
 * The caller must first go through the rcu_tasks_trace gp for 'ptr'
 * before calling bpf_mem_cache_raw_free().
 * It could be used when the rcu_tasks_trace callback does not have
 * a hold on the original bpf_mem_alloc object that allocated the
 * 'ptr'. This should only be used in the uncommon code path.
 * Otherwise, the bpf_mem_alloc's free_llist cannot be refilled
 * and may affect performance.
 */
void bpf_mem_cache_raw_free(void *ptr)
{
	if (!ptr)
		return;

	kfree(ptr - LLIST_NODE_SZ);
}

/* When flags == GFP_KERNEL, it signals that the caller will not cause
 * deadlock when using kmalloc. bpf_mem_cache_alloc_flags() will use
 * kmalloc if the free_llist is empty.
 */
void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
{
	struct bpf_mem_cache *c;
	void *ret;

	c = this_cpu_ptr(ma->cache);

	ret = unit_alloc(c);
	if (!ret && flags == GFP_KERNEL) {
		struct mem_cgroup *memcg, *old_memcg;

		memcg = get_memcg(c);
		old_memcg = set_active_memcg(memcg);
		ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT);
		set_active_memcg(old_memcg);
		mem_cgroup_put(memcg);
	}

	return !ret ? NULL : ret + LLIST_NODE_SZ;
}