Commit 1705c62e authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Sleepable local storage'



KP Singh says:

====================

Local storage is currently unusable in sleepable helpers. One of the
important use cases of local_storage is to attach security (or
performance) contextual information to kernel objects in LSM / tracing
programs to be used later in the life-cyle of the object.

Sometimes this context can only be gathered from sleepable programs
(because it needs accesing __user pointers or helpers like
bpf_ima_inode_hash). Allowing local storage to be used from sleepable
programs allows such context to be managed with the benefits of
local_storage.

# v2 -> v3

* Fixed some RCU issues pointed by Martin
* Added Martin's ack

# v1 -> v2

* Generalize RCU checks (will send a separate patch for updating
  non local storage code where this can be used).
* Add missing RCU lock checks from v1
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 3ccdcee2 0ae6eff2
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -17,6 +17,9 @@

#define BPF_LOCAL_STORAGE_CACHE_SIZE	16

#define bpf_rcu_lock_held()                                                    \
	(rcu_read_lock_held() || rcu_read_lock_trace_held() ||                 \
	 rcu_read_lock_bh_held())
struct bpf_local_storage_map_bucket {
	struct hlist_head list;
	raw_spinlock_t lock;
@@ -162,4 +165,6 @@ struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
			 void *value, u64 map_flags);

void bpf_local_storage_free_rcu(struct rcu_head *rcu);

#endif /* _BPF_LOCAL_STORAGE_H */
+5 −1
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
#include <linux/rcupdate_trace.h>

DEFINE_BPF_STORAGE_CACHE(inode_cache);

@@ -44,7 +45,8 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
	if (!bsb)
		return NULL;

	inode_storage = rcu_dereference(bsb->storage);
	inode_storage =
		rcu_dereference_check(bsb->storage, bpf_rcu_lock_held());
	if (!inode_storage)
		return NULL;

@@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
{
	struct bpf_local_storage_data *sdata;

	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
		return (unsigned long)NULL;

@@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
BPF_CALL_2(bpf_inode_storage_delete,
	   struct bpf_map *, map, struct inode *, inode)
{
	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (!inode)
		return -EINVAL;

+37 −13
Original line number Diff line number Diff line
@@ -11,6 +11,9 @@
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
#include <linux/rcupdate.h>
#include <linux/rcupdate_trace.h>
#include <linux/rcupdate_wait.h>

#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)

@@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
	return NULL;
}

void bpf_local_storage_free_rcu(struct rcu_head *rcu)
{
	struct bpf_local_storage *local_storage;

	local_storage = container_of(rcu, struct bpf_local_storage, rcu);
	kfree_rcu(local_storage, rcu);
}

static void bpf_selem_free_rcu(struct rcu_head *rcu)
{
	struct bpf_local_storage_elem *selem;

	selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
	kfree_rcu(selem, rcu);
}

/* local_storage->lock must be held and selem->local_storage == local_storage.
 * The caller must ensure selem->smap is still valid to be
 * dereferenced for its smap->elem_size and smap->cache_idx.
@@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
	bool free_local_storage;
	void *owner;

	smap = rcu_dereference(SDATA(selem)->smap);
	smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
	owner = local_storage->owner;

	/* All uncharging on the owner must be done first.
@@ -118,12 +137,12 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
		 *
		 * Although the unlock will be done under
		 * rcu_read_lock(),  it is more intutivie to
		 * read if kfree_rcu(local_storage, rcu) is done
		 * read if the freeing of the storage is done
		 * after the raw_spin_unlock_bh(&local_storage->lock).
		 *
		 * Hence, a "bool free_local_storage" is returned
		 * to the caller which then calls the kfree_rcu()
		 * after unlock.
		 * to the caller which then calls then frees the storage after
		 * all the RCU grace periods have expired.
		 */
	}
	hlist_del_init_rcu(&selem->snode);
@@ -131,8 +150,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
	    SDATA(selem))
		RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);

	kfree_rcu(selem, rcu);

	call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
	return free_local_storage;
}

@@ -146,7 +164,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
		/* selem has already been unlinked from sk */
		return;

	local_storage = rcu_dereference(selem->local_storage);
	local_storage = rcu_dereference_check(selem->local_storage,
					      bpf_rcu_lock_held());
	raw_spin_lock_irqsave(&local_storage->lock, flags);
	if (likely(selem_linked_to_storage(selem)))
		free_local_storage = bpf_selem_unlink_storage_nolock(
@@ -154,7 +173,8 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
	raw_spin_unlock_irqrestore(&local_storage->lock, flags);

	if (free_local_storage)
		kfree_rcu(local_storage, rcu);
		call_rcu_tasks_trace(&local_storage->rcu,
				     bpf_local_storage_free_rcu);
}

void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
@@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
		/* selem has already be unlinked from smap */
		return;

	smap = rcu_dereference(SDATA(selem)->smap);
	smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
	b = select_bucket(smap, selem);
	raw_spin_lock_irqsave(&b->lock, flags);
	if (likely(selem_linked_to_map(selem)))
@@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
	struct bpf_local_storage_elem *selem;

	/* Fast path (cache hit) */
	sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
	sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
				      bpf_rcu_lock_held());
	if (sdata && rcu_access_pointer(sdata->smap) == smap)
		return sdata;

	/* Slow path (cache miss) */
	hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
	hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
				  rcu_read_lock_trace_held())
		if (rcu_access_pointer(SDATA(selem)->smap) == smap)
			break;

@@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner,
		 * bucket->list, first_selem can be freed immediately
		 * (instead of kfree_rcu) because
		 * bpf_local_storage_map_free() does a
		 * synchronize_rcu() before walking the bucket->list.
		 * synchronize_rcu_mult (waiting for both sleepable and
		 * normal programs) before walking the bucket->list.
		 * Hence, no one is accessing selem from the
		 * bucket->list under rcu_read_lock().
		 */
@@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
		     !map_value_has_spin_lock(&smap->map)))
		return ERR_PTR(-EINVAL);

	local_storage = rcu_dereference(*owner_storage(smap, owner));
	local_storage = rcu_dereference_check(*owner_storage(smap, owner),
					      bpf_rcu_lock_held());
	if (!local_storage || hlist_empty(&local_storage->list)) {
		/* Very first elem for the owner */
		err = check_flags(NULL, map_flags);
+5 −1
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
#include <linux/rcupdate_trace.h>

DEFINE_BPF_STORAGE_CACHE(task_cache);

@@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
	struct bpf_local_storage *task_storage;
	struct bpf_local_storage_map *smap;

	task_storage = rcu_dereference(task->bpf_storage);
	task_storage =
		rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held());
	if (!task_storage)
		return NULL;

@@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
{
	struct bpf_local_storage_data *sdata;

	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
		return (unsigned long)NULL;

@@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
{
	int ret;

	WARN_ON_ONCE(!bpf_rcu_lock_held());
	if (!task)
		return -EINVAL;

+3 −0
Original line number Diff line number Diff line
@@ -11874,6 +11874,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
			}
			break;
		case BPF_MAP_TYPE_RINGBUF:
		case BPF_MAP_TYPE_INODE_STORAGE:
		case BPF_MAP_TYPE_SK_STORAGE:
		case BPF_MAP_TYPE_TASK_STORAGE:
			break;
		default:
			verbose(env,
Loading