Commit 5556baca authored by Andrii Nakryiko's avatar Andrii Nakryiko
Browse files

Merge branch 'Add lookup_and_delete_elem support to BPF hash map types'



Denis Salopek says:

====================

This patch series extends the existing bpf_map_lookup_and_delete_elem()
functionality with 4 more map types:
 - BPF_MAP_TYPE_HASH,
 - BPF_MAP_TYPE_PERCPU_HASH,
 - BPF_MAP_TYPE_LRU_HASH and
 - BPF_MAP_TYPE_LRU_PERCPU_HASH.

Patch 1 adds most of its functionality and logic as well as
documentation.

As it was previously limited to only stacks and queues which do not
support the BPF_F_LOCK flag, patch 2 enables its usage by adding a new
libbpf API bpf_map_lookup_and_delete_elem_flags() based on the existing
bpf_map_lookup_elem_flags().

Patch 3 adds selftests for lookup_and_delete_elem().

Changes in patch 1:
v7: Minor formating nits, add Acked-by.
v6: Remove unneeded flag check, minor code/format fixes.
v5: Split patch to 3 patches. Extend BPF_MAP_LOOKUP_AND_DELETE_ELEM
documentation with this changes.
v4: Fix the return value for unsupported map types.
v3: Add bpf_map_lookup_and_delete_elem_flags() and enable BPF_F_LOCK
flag, change CHECKs to ASSERT_OKs, initialize variables to 0.
v2: Add functionality for LRU/per-CPU, add test_progs tests.

Changes in patch 2:
v7: No change.
v6: Add Acked-by.
v5: Move to the newest libbpf version (0.4.0).

Changes in patch 3:
v7: Remove ASSERT_GE macro which is already added in some other commit,
change ASSERT_OK to ASSERT_OK_PTR, add Acked-by.
v6: Remove PERCPU macros, add ASSERT_GE macro to test_progs.h, remove
leftover code.
v5: Use more appropriate macros. Better check for changed value.
====================

Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents f9bceaa5 49c299b6
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -70,6 +70,8 @@ struct bpf_map_ops {
	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
	int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
				union bpf_attr __user *uattr);
	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
					  void *value, u64 flags);
	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
					   const union bpf_attr *attr,
					   union bpf_attr __user *uattr);
+13 −0
Original line number Diff line number Diff line
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
 *		Look up an element with the given *key* in the map referred to
 *		by the file descriptor *fd*, and if found, delete the element.
 *
 *		For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
 *		types, the *flags* argument needs to be set to 0, but for other
 *		map types, it may be specified as:
 *
 *		**BPF_F_LOCK**
 *			Look up and delete the value of a spin-locked map
 *			without returning the lock. This must be specified if
 *			the elements contain a spinlock.
 *
 *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
 *		implement this command as a "pop" operation, deleting the top
 *		element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
 *		This command is only valid for the following map types:
 *		* **BPF_MAP_TYPE_QUEUE**
 *		* **BPF_MAP_TYPE_STACK**
 *		* **BPF_MAP_TYPE_HASH**
 *		* **BPF_MAP_TYPE_PERCPU_HASH**
 *		* **BPF_MAP_TYPE_LRU_HASH**
 *		* **BPF_MAP_TYPE_LRU_PERCPU_HASH**
 *
 *	Return
 *		Returns zero on success. On error, -1 is returned and *errno*
+98 −0
Original line number Diff line number Diff line
@@ -1401,6 +1401,100 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
	rcu_read_unlock();
}

static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
					     void *value, bool is_lru_map,
					     bool is_percpu, u64 flags)
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct hlist_nulls_head *head;
	unsigned long bflags;
	struct htab_elem *l;
	u32 hash, key_size;
	struct bucket *b;
	int ret;

	key_size = map->key_size;

	hash = htab_map_hash(key, key_size, htab->hashrnd);
	b = __select_bucket(htab, hash);
	head = &b->head;

	ret = htab_lock_bucket(htab, b, hash, &bflags);
	if (ret)
		return ret;

	l = lookup_elem_raw(head, hash, key, key_size);
	if (!l) {
		ret = -ENOENT;
	} else {
		if (is_percpu) {
			u32 roundup_value_size = round_up(map->value_size, 8);
			void __percpu *pptr;
			int off = 0, cpu;

			pptr = htab_elem_get_ptr(l, key_size);
			for_each_possible_cpu(cpu) {
				bpf_long_memcpy(value + off,
						per_cpu_ptr(pptr, cpu),
						roundup_value_size);
				off += roundup_value_size;
			}
		} else {
			u32 roundup_key_size = round_up(map->key_size, 8);

			if (flags & BPF_F_LOCK)
				copy_map_value_locked(map, value, l->key +
						      roundup_key_size,
						      true);
			else
				copy_map_value(map, value, l->key +
					       roundup_key_size);
			check_and_init_map_lock(map, value);
		}

		hlist_nulls_del_rcu(&l->hash_node);
		if (!is_lru_map)
			free_htab_elem(htab, l);
	}

	htab_unlock_bucket(htab, b, hash, bflags);

	if (is_lru_map && l)
		bpf_lru_push_free(&htab->lru, &l->lru_node);

	return ret;
}

static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
					   void *value, u64 flags)
{
	return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
						 flags);
}

static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
						  void *key, void *value,
						  u64 flags)
{
	return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
						 flags);
}

static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
					       void *value, u64 flags)
{
	return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
						 flags);
}

static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
						      void *key, void *value,
						      u64 flags)
{
	return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
						 flags);
}

static int
__htab_map_lookup_and_delete_batch(struct bpf_map *map,
				   const union bpf_attr *attr,
@@ -1934,6 +2028,7 @@ const struct bpf_map_ops htab_map_ops = {
	.map_free = htab_map_free,
	.map_get_next_key = htab_map_get_next_key,
	.map_lookup_elem = htab_map_lookup_elem,
	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
	.map_update_elem = htab_map_update_elem,
	.map_delete_elem = htab_map_delete_elem,
	.map_gen_lookup = htab_map_gen_lookup,
@@ -1954,6 +2049,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
	.map_free = htab_map_free,
	.map_get_next_key = htab_map_get_next_key,
	.map_lookup_elem = htab_lru_map_lookup_elem,
	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
	.map_update_elem = htab_lru_map_update_elem,
	.map_delete_elem = htab_lru_map_delete_elem,
@@ -2077,6 +2173,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
	.map_free = htab_map_free,
	.map_get_next_key = htab_map_get_next_key,
	.map_lookup_elem = htab_percpu_map_lookup_elem,
	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
	.map_update_elem = htab_percpu_map_update_elem,
	.map_delete_elem = htab_map_delete_elem,
	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -2096,6 +2193,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
	.map_free = htab_map_free,
	.map_get_next_key = htab_map_get_next_key,
	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
	.map_update_elem = htab_lru_percpu_map_update_elem,
	.map_delete_elem = htab_lru_map_delete_elem,
	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
+30 −4
Original line number Diff line number Diff line
@@ -1483,7 +1483,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
	return err;
}

#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags

static int map_lookup_and_delete_elem(union bpf_attr *attr)
{
@@ -1499,6 +1499,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
	if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
		return -EINVAL;

	if (attr->flags & ~BPF_F_LOCK)
		return -EINVAL;

	f = fdget(ufd);
	map = __bpf_map_get(f);
	if (IS_ERR(map))
@@ -1509,24 +1512,47 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
		goto err_put;
	}

	if (attr->flags &&
	    (map->map_type == BPF_MAP_TYPE_QUEUE ||
	     map->map_type == BPF_MAP_TYPE_STACK)) {
		err = -EINVAL;
		goto err_put;
	}

	if ((attr->flags & BPF_F_LOCK) &&
	    !map_value_has_spin_lock(map)) {
		err = -EINVAL;
		goto err_put;
	}

	key = __bpf_copy_key(ukey, map->key_size);
	if (IS_ERR(key)) {
		err = PTR_ERR(key);
		goto err_put;
	}

	value_size = map->value_size;
	value_size = bpf_map_value_size(map);

	err = -ENOMEM;
	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
	if (!value)
		goto free_key;

	err = -ENOTSUPP;
	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
	    map->map_type == BPF_MAP_TYPE_STACK) {
		err = map->ops->map_pop_elem(map, value);
	} else {
		err = -ENOTSUPP;
	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
		if (!bpf_map_is_dev_bound(map)) {
			bpf_disable_instrumentation();
			rcu_read_lock();
			err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
			rcu_read_unlock();
			bpf_enable_instrumentation();
		}
	}

	if (err)
+13 −0
Original line number Diff line number Diff line
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
 *		Look up an element with the given *key* in the map referred to
 *		by the file descriptor *fd*, and if found, delete the element.
 *
 *		For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
 *		types, the *flags* argument needs to be set to 0, but for other
 *		map types, it may be specified as:
 *
 *		**BPF_F_LOCK**
 *			Look up and delete the value of a spin-locked map
 *			without returning the lock. This must be specified if
 *			the elements contain a spinlock.
 *
 *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
 *		implement this command as a "pop" operation, deleting the top
 *		element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
 *		This command is only valid for the following map types:
 *		* **BPF_MAP_TYPE_QUEUE**
 *		* **BPF_MAP_TYPE_STACK**
 *		* **BPF_MAP_TYPE_HASH**
 *		* **BPF_MAP_TYPE_PERCPU_HASH**
 *		* **BPF_MAP_TYPE_LRU_HASH**
 *		* **BPF_MAP_TYPE_LRU_PERCPU_HASH**
 *
 *	Return
 *		Returns zero on success. On error, -1 is returned and *errno*
Loading