Commit 8d275960 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf: Use bpf_mem_cache_alloc/free in bpf_local_storage'

Martin KaFai Lau says:

====================

From: Martin KaFai Lau <martin.lau@kernel.org>

This set is a continuation of the effort in using
bpf_mem_cache_alloc/free in bpf_local_storage [1]

Major change is only using bpf_mem_alloc for task and cgrp storage
while sk and inode stay with kzalloc/kfree. The details is
in patch 2.

[1]: https://lore.kernel.org/bpf/20230308065936.1550103-1-martin.lau@linux.dev/



v3:
- Only use bpf_mem_alloc for task and cgrp storage.
- sk and inode storage stay with kzalloc/kfree.
- Check NULL and add comments in bpf_mem_cache_raw_free() in patch 1.
- Added test and benchmark for task storage.

v2:
- Added bpf_mem_cache_alloc_flags() and bpf_mem_cache_raw_free()
  to hide the internal data structure of the bpf allocator.
- Fixed a typo bug in bpf_selem_free()
- Simplified the test_local_storage test by directly using
  err returned from libbpf
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e9936076 cbe9d93d
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/types.h>
#include <linux/bpf_mem_alloc.h>
#include <uapi/linux/btf.h>

#define BPF_LOCAL_STORAGE_CACHE_SIZE	16
@@ -55,6 +56,9 @@ struct bpf_local_storage_map {
	u32 bucket_log;
	u16 elem_size;
	u16 cache_idx;
	struct bpf_mem_alloc selem_ma;
	struct bpf_mem_alloc storage_ma;
	bool bpf_ma;
};

struct bpf_local_storage_data {
@@ -122,7 +126,8 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);

struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
			    struct bpf_local_storage_cache *cache);
			    struct bpf_local_storage_cache *cache,
			    bool bpf_ma);

struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+2 −0
Original line number Diff line number Diff line
@@ -31,5 +31,7 @@ void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
/* kmem_cache_alloc/free equivalent: */
void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma);
void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr);
void bpf_mem_cache_raw_free(void *ptr);
void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags);

#endif /* _BPF_MEM_ALLOC_H */
+1 −1
Original line number Diff line number Diff line
@@ -149,7 +149,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)

static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
	return bpf_local_storage_map_alloc(attr, &cgroup_cache);
	return bpf_local_storage_map_alloc(attr, &cgroup_cache, true);
}

static void cgroup_storage_map_free(struct bpf_map *map)
+1 −1
Original line number Diff line number Diff line
@@ -199,7 +199,7 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,

static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
{
	return bpf_local_storage_map_alloc(attr, &inode_cache);
	return bpf_local_storage_map_alloc(attr, &inode_cache, false);
}

static void inode_storage_map_free(struct bpf_map *map)
+200 −23
Original line number Diff line number Diff line
@@ -80,8 +80,24 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
		return NULL;

	if (smap->bpf_ma) {
		migrate_disable();
		selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags);
		migrate_enable();
		if (selem)
			/* Keep the original bpf_map_kzalloc behavior
			 * before started using the bpf_mem_cache_alloc.
			 *
			 * No need to use zero_map_value. The bpf_selem_free()
			 * only does bpf_mem_cache_free when there is
			 * no other bpf prog is using the selem.
			 */
			memset(SDATA(selem)->data, 0, smap->map.value_size);
	} else {
		selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
					gfp_flags | __GFP_NOWARN);
	}

	if (selem) {
		if (value)
			copy_map_value(&smap->map, SDATA(selem)->data, value);
@@ -95,41 +111,104 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
	return NULL;
}

static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
/* rcu tasks trace callback for bpf_ma == false */
static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
	struct bpf_local_storage *local_storage;

	/* If RCU Tasks Trace grace period implies RCU grace period, do
	 * kfree(), else do kfree_rcu().
	 */
	local_storage = container_of(rcu, struct bpf_local_storage, rcu);
	if (rcu_trace_implies_rcu_gp())
		kfree(local_storage);
	else
		kfree_rcu(local_storage, rcu);
}

static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
{
	struct bpf_local_storage *local_storage;

	local_storage = container_of(rcu, struct bpf_local_storage, rcu);
	bpf_mem_cache_raw_free(local_storage);
}

static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
	/* If RCU Tasks Trace grace period implies RCU grace period, do
	 * kfree(), else do kfree_rcu().
	 */
	if (rcu_trace_implies_rcu_gp())
		bpf_local_storage_free_rcu(rcu);
	else
		call_rcu(rcu, bpf_local_storage_free_rcu);
}

/* Handle bpf_ma == false */
static void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
				     bool vanilla_rcu)
{
	if (vanilla_rcu)
		kfree_rcu(local_storage, rcu);
	else
		call_rcu_tasks_trace(&local_storage->rcu,
				     __bpf_local_storage_free_trace_rcu);
}

static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
				   bool reuse_now)
				   struct bpf_local_storage_map *smap,
				   bool bpf_ma, bool reuse_now)
{
	if (!reuse_now)
	if (!bpf_ma) {
		__bpf_local_storage_free(local_storage, reuse_now);
		return;
	}

	if (!reuse_now) {
		call_rcu_tasks_trace(&local_storage->rcu,
				     bpf_local_storage_free_trace_rcu);
	else
		return;
	}

	if (smap) {
		migrate_disable();
		bpf_mem_cache_free(&smap->storage_ma, local_storage);
		migrate_enable();
	} else {
		/* smap could be NULL if the selem that triggered
		 * this 'local_storage' creation had been long gone.
		 * In this case, directly do call_rcu().
		 */
		call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
	}
}

static void bpf_selem_free_rcu(struct rcu_head *rcu)
/* rcu tasks trace callback for bpf_ma == false */
static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
	struct bpf_local_storage_elem *selem;

	selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
	if (rcu_trace_implies_rcu_gp())
		kfree(selem);
	else
		kfree_rcu(selem, rcu);
}

/* Handle bpf_ma == false */
static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
			     bool vanilla_rcu)
{
	if (vanilla_rcu)
		kfree_rcu(selem, rcu);
	else
		call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
}

static void bpf_selem_free_rcu(struct rcu_head *rcu)
{
	struct bpf_local_storage_elem *selem;

	selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
	bpf_mem_cache_raw_free(selem);
}

static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
@@ -145,10 +224,23 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
		    bool reuse_now)
{
	bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
	if (!reuse_now)

	if (!smap->bpf_ma) {
		__bpf_selem_free(selem, reuse_now);
		return;
	}

	if (!reuse_now) {
		call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
	else
		call_rcu(&selem->rcu, bpf_selem_free_rcu);
	} else {
		/* Instead of using the vanilla call_rcu(),
		 * bpf_mem_cache_free will be able to reuse selem
		 * immediately.
		 */
		migrate_disable();
		bpf_mem_cache_free(&smap->selem_ma, selem);
		migrate_enable();
	}
}

/* local_storage->lock must be held and selem->local_storage == local_storage.
@@ -209,11 +301,47 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
	return free_local_storage;
}

static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage,
				 struct bpf_local_storage_map *storage_smap,
				 struct bpf_local_storage_elem *selem)
{

	struct bpf_local_storage_map *selem_smap;

	/* local_storage->smap may be NULL. If it is, get the bpf_ma
	 * from any selem in the local_storage->list. The bpf_ma of all
	 * local_storage and selem should have the same value
	 * for the same map type.
	 *
	 * If the local_storage->list is already empty, the caller will not
	 * care about the bpf_ma value also because the caller is not
	 * responsibile to free the local_storage.
	 */

	if (storage_smap)
		return storage_smap->bpf_ma;

	if (!selem) {
		struct hlist_node *n;

		n = rcu_dereference_check(hlist_first_rcu(&local_storage->list),
					  bpf_rcu_lock_held());
		if (!n)
			return false;

		selem = hlist_entry(n, struct bpf_local_storage_elem, snode);
	}
	selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());

	return selem_smap->bpf_ma;
}

static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
				     bool reuse_now)
{
	struct bpf_local_storage_map *storage_smap;
	struct bpf_local_storage *local_storage;
	bool free_local_storage = false;
	bool bpf_ma, free_local_storage = false;
	unsigned long flags;

	if (unlikely(!selem_linked_to_storage_lockless(selem)))
@@ -222,6 +350,10 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,

	local_storage = rcu_dereference_check(selem->local_storage,
					      bpf_rcu_lock_held());
	storage_smap = rcu_dereference_check(local_storage->smap,
					     bpf_rcu_lock_held());
	bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem);

	raw_spin_lock_irqsave(&local_storage->lock, flags);
	if (likely(selem_linked_to_storage(selem)))
		free_local_storage = bpf_selem_unlink_storage_nolock(
@@ -229,7 +361,7 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
	raw_spin_unlock_irqrestore(&local_storage->lock, flags);

	if (free_local_storage)
		bpf_local_storage_free(local_storage, reuse_now);
		bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now);
}

void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
@@ -349,8 +481,15 @@ int bpf_local_storage_alloc(void *owner,
	if (err)
		return err;

	if (smap->bpf_ma) {
		migrate_disable();
		storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags);
		migrate_enable();
	} else {
		storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
					  gfp_flags | __GFP_NOWARN);
	}

	if (!storage) {
		err = -ENOMEM;
		goto uncharge;
@@ -396,7 +535,7 @@ int bpf_local_storage_alloc(void *owner,
	return 0;

uncharge:
	bpf_local_storage_free(storage, true);
	bpf_local_storage_free(storage, smap, smap->bpf_ma, true);
	mem_uncharge(smap, owner, sizeof(*storage));
	return err;
}
@@ -609,11 +748,15 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,

void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
{
	struct bpf_local_storage_map *storage_smap;
	struct bpf_local_storage_elem *selem;
	bool free_storage = false;
	bool bpf_ma, free_storage = false;
	struct hlist_node *n;
	unsigned long flags;

	storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held());
	bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL);

	/* Neither the bpf_prog nor the bpf_map's syscall
	 * could be modifying the local_storage->list now.
	 * Thus, no elem can be added to or deleted from the
@@ -641,7 +784,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
	raw_spin_unlock_irqrestore(&local_storage->lock, flags);

	if (free_storage)
		bpf_local_storage_free(local_storage, true);
		bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true);
}

u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
@@ -654,13 +797,25 @@ u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
	return usage;
}

/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory.
 * A deadlock free allocator is useful for storage that the bpf prog can easily
 * get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf.
 * The task and cgroup storage fall into this case. The bpf_mem_alloc reuses
 * memory immediately. To be reuse-immediate safe, the owner destruction
 * code path needs to go through a rcu grace period before calling
 * bpf_local_storage_destroy().
 *
 * When bpf_ma == false, the kmalloc and kfree are used.
 */
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
			    struct bpf_local_storage_cache *cache)
			    struct bpf_local_storage_cache *cache,
			    bool bpf_ma)
{
	struct bpf_local_storage_map *smap;
	unsigned int i;
	u32 nbuckets;
	int err;

	smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
	if (!smap)
@@ -675,8 +830,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
	smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
					 nbuckets, GFP_USER | __GFP_NOWARN);
	if (!smap->buckets) {
		bpf_map_area_free(smap);
		return ERR_PTR(-ENOMEM);
		err = -ENOMEM;
		goto free_smap;
	}

	for (i = 0; i < nbuckets; i++) {
@@ -687,8 +842,26 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
	smap->elem_size = offsetof(struct bpf_local_storage_elem,
				   sdata.data[attr->value_size]);

	smap->bpf_ma = bpf_ma;
	if (bpf_ma) {
		err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false);
		if (err)
			goto free_smap;

		err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false);
		if (err) {
			bpf_mem_alloc_destroy(&smap->selem_ma);
			goto free_smap;
		}
	}

	smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
	return &smap->map;

free_smap:
	kvfree(smap->buckets);
	bpf_map_area_free(smap);
	return ERR_PTR(err);
}

void bpf_local_storage_map_free(struct bpf_map *map,
@@ -754,6 +927,10 @@ void bpf_local_storage_map_free(struct bpf_map *map,
	 */
	synchronize_rcu();

	if (smap->bpf_ma) {
		bpf_mem_alloc_destroy(&smap->selem_ma);
		bpf_mem_alloc_destroy(&smap->storage_ma);
	}
	kvfree(smap->buckets);
	bpf_map_area_free(smap);
}
Loading