Commit b00fa38a authored by Joanne Koong's avatar Joanne Koong Committed by Alexei Starovoitov
Browse files

bpf: Enable non-atomic allocations in local storage



Currently, local storage memory can only be allocated atomically
(GFP_ATOMIC). This restriction is too strict for sleepable bpf
programs.

In this patch, the verifier detects whether the program is sleepable,
and passes the corresponding GFP_KERNEL or GFP_ATOMIC flag as a
5th argument to bpf_task/sk/inode_storage_get. This flag will propagate
down to the local storage functions that allocate memory.

Please note that bpf_task/sk/inode_storage_update_elem functions are
invoked by userspace applications through syscalls. Preemption is
disabled before bpf_task/sk/inode_storage_update_elem is called, which
means they will always have to allocate memory atomically.

Signed-off-by: default avatarJoanne Koong <joannelkoong@gmail.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarKP Singh <kpsingh@kernel.org>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220318045553.3091807-2-joannekoong@fb.com
parent a8fee962
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);

struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
		bool charge_mem);
		bool charge_mem, gfp_t gfp_flags);

int
bpf_local_storage_alloc(void *owner,
			struct bpf_local_storage_map *smap,
			struct bpf_local_storage_elem *first_selem);
			struct bpf_local_storage_elem *first_selem,
			gfp_t gfp_flags);

struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
			 void *value, u64 map_flags);
			 void *value, u64 map_flags, gfp_t gfp_flags);

void bpf_local_storage_free_rcu(struct rcu_head *rcu);

+5 −4
Original line number Diff line number Diff line
@@ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,

	sdata = bpf_local_storage_update(f->f_inode,
					 (struct bpf_local_storage_map *)map,
					 value, map_flags);
					 value, map_flags, GFP_ATOMIC);
	fput(f);
	return PTR_ERR_OR_ZERO(sdata);
}
@@ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
	return err;
}

BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
	   void *, value, u64, flags)
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
	   void *, value, u64, flags, gfp_t, gfp_flags)
{
	struct bpf_local_storage_data *sdata;

@@ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
		sdata = bpf_local_storage_update(
			inode, (struct bpf_local_storage_map *)map, value,
			BPF_NOEXIST);
			BPF_NOEXIST, gfp_flags);
		return IS_ERR(sdata) ? (unsigned long)NULL :
					     (unsigned long)sdata->data;
	}
+37 −21
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)

struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
		void *value, bool charge_mem)
		void *value, bool charge_mem, gfp_t gfp_flags)
{
	struct bpf_local_storage_elem *selem;

@@ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
		return NULL;

	selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
				GFP_ATOMIC | __GFP_NOWARN);
				gfp_flags | __GFP_NOWARN);
	if (selem) {
		if (value)
			memcpy(SDATA(selem)->data, value, smap->map.value_size);
@@ -282,7 +282,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,

int bpf_local_storage_alloc(void *owner,
			    struct bpf_local_storage_map *smap,
			    struct bpf_local_storage_elem *first_selem)
			    struct bpf_local_storage_elem *first_selem,
			    gfp_t gfp_flags)
{
	struct bpf_local_storage *prev_storage, *storage;
	struct bpf_local_storage **owner_storage_ptr;
@@ -293,7 +294,7 @@ int bpf_local_storage_alloc(void *owner,
		return err;

	storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
				  GFP_ATOMIC | __GFP_NOWARN);
				  gfp_flags | __GFP_NOWARN);
	if (!storage) {
		err = -ENOMEM;
		goto uncharge;
@@ -350,10 +351,10 @@ int bpf_local_storage_alloc(void *owner,
 */
struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
			 void *value, u64 map_flags)
			 void *value, u64 map_flags, gfp_t gfp_flags)
{
	struct bpf_local_storage_data *old_sdata = NULL;
	struct bpf_local_storage_elem *selem;
	struct bpf_local_storage_elem *selem = NULL;
	struct bpf_local_storage *local_storage;
	unsigned long flags;
	int err;
@@ -365,6 +366,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
		     !map_value_has_spin_lock(&smap->map)))
		return ERR_PTR(-EINVAL);

	if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
		return ERR_PTR(-EINVAL);

	local_storage = rcu_dereference_check(*owner_storage(smap, owner),
					      bpf_rcu_lock_held());
	if (!local_storage || hlist_empty(&local_storage->list)) {
@@ -373,11 +377,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
		if (err)
			return ERR_PTR(err);

		selem = bpf_selem_alloc(smap, owner, value, true);
		selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
		if (!selem)
			return ERR_PTR(-ENOMEM);

		err = bpf_local_storage_alloc(owner, smap, selem);
		err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
		if (err) {
			kfree(selem);
			mem_uncharge(smap, owner, smap->elem_size);
@@ -404,6 +408,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
		}
	}

	if (gfp_flags == GFP_KERNEL) {
		selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
		if (!selem)
			return ERR_PTR(-ENOMEM);
	}

	raw_spin_lock_irqsave(&local_storage->lock, flags);

	/* Recheck local_storage->list under local_storage->lock */
@@ -429,6 +439,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
		goto unlock;
	}

	if (gfp_flags != GFP_KERNEL) {
		/* local_storage->lock is held.  Hence, we are sure
		 * we can unlink and uncharge the old_sdata successfully
		 * later.  Hence, instead of charging the new selem now
@@ -438,11 +449,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
		 * old_sdata will not be uncharged later during
		 * bpf_selem_unlink_storage_nolock().
		 */
	selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
		selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
		if (!selem) {
			err = -ENOMEM;
			goto unlock_err;
		}
	}

	/* First, link the new selem to the map */
	bpf_selem_link_map(smap, selem);
@@ -463,6 +475,10 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,

unlock_err:
	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
	if (selem) {
		mem_uncharge(smap, owner, smap->elem_size);
		kfree(selem);
	}
	return ERR_PTR(err);
}

+6 −4
Original line number Diff line number Diff line
@@ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,

	bpf_task_storage_lock();
	sdata = bpf_local_storage_update(
		task, (struct bpf_local_storage_map *)map, value, map_flags);
		task, (struct bpf_local_storage_map *)map, value, map_flags,
		GFP_ATOMIC);
	bpf_task_storage_unlock();

	err = PTR_ERR_OR_ZERO(sdata);
@@ -226,8 +227,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
	return err;
}

BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
	   task, void *, value, u64, flags)
/* *gfp_flags* is a hidden argument provided by the verifier */
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
	   task, void *, value, u64, flags, gfp_t, gfp_flags)
{
	struct bpf_local_storage_data *sdata;

@@ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
		sdata = bpf_local_storage_update(
			task, (struct bpf_local_storage_map *)map, value,
			BPF_NOEXIST);
			BPF_NOEXIST, gfp_flags);

unlock:
	bpf_task_storage_unlock();
+20 −0
Original line number Diff line number Diff line
@@ -13492,6 +13492,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
			goto patch_call_imm;
		}

		if (insn->imm == BPF_FUNC_task_storage_get ||
		    insn->imm == BPF_FUNC_sk_storage_get ||
		    insn->imm == BPF_FUNC_inode_storage_get) {
			if (env->prog->aux->sleepable)
				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_KERNEL);
			else
				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_ATOMIC);
			insn_buf[1] = *insn;
			cnt = 2;

			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
			if (!new_prog)
				return -ENOMEM;

			delta += cnt - 1;
			env->prog = prog = new_prog;
			insn = new_prog->insnsi + i + delta;
			goto patch_call_imm;
		}

		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
		 * and other inlining handlers are currently limited to 64 bit
		 * only.
Loading