Commit c9ae8c96 authored by Martin KaFai Lau's avatar Martin KaFai Lau
Browse files

Merge branch 'fixes for concurrent htab updates'

Hou Tao says:

====================

From: Hou Tao <houtao1@huawei.com>

Hi,

The patchset aims to fix the issues found during investigating the
syzkaller problem reported in [0]. It seems that the concurrent updates
to the same hash-table bucket may fail as shown in patch 1.

Patch 1 uses preempt_disable() to fix the problem for
htab_use_raw_lock() case. For !htab_use_raw_lock() case, the problem is
left to "BPF specific memory allocator" patchset [1] in which
!htab_use_raw_lock() will be removed.

Patch 2 fixes the out-of-bound memory read problem reported in [0]. The
problem has the root cause as patch 1 and it is fixed by handling -EBUSY
from htab_lock_bucket() correctly.

Patch 3 add two cases for hash-table update: one for the reentrancy of
bpf_map_update_elem(), and another one for concurrent updates of the
same hash-table bucket.

Comments are always welcome.

Regards,
Tao

[0]: https://lore.kernel.org/bpf/CACkBjsbuxaR6cv0kXJoVnBfL9ZJXjjoUcMpw_Ogc313jSrg14A@mail.gmail.com/
[1]: https://lore.kernel.org/bpf/20220819214232.18784-1-alexei.starovoitov@gmail.com/

Change Log:

v4:
 * rebased on bpf-next
 * add htab_update to DENYLIST.s390x

v3: https://lore.kernel.org/bpf/20220829023709.1958204-1-houtao@huaweicloud.com/
 * patch 1: update commit message and add Fixes tag
 * patch 2: add Fixes tag
 * patch 3: elaborate the description of test cases

v2: https://lore.kernel.org/bpf/bd60ef93-1c6a-2db2-557d-b09b92ad22bd@huaweicloud.com/
 * Note the fix is for CONFIG_PREEMPT case in commit message and add
   Reviewed-by tag for patch 1
 * Drop patch "bpf: Allow normally concurrent map updates for !htab_use_raw_lock() case"

v1: https://lore.kernel.org/bpf/20220821033223.2598791-1-houtao@huaweicloud.com/


====================

Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents 19707294 1c636b62
Loading
Loading
Loading
Loading
+23 −7
Original line number Diff line number Diff line
@@ -162,17 +162,25 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,
				   unsigned long *pflags)
{
	unsigned long flags;
	bool use_raw_lock;

	hash = hash & HASHTAB_MAP_LOCK_MASK;

	use_raw_lock = htab_use_raw_lock(htab);
	if (use_raw_lock)
		preempt_disable();
	else
		migrate_disable();
	if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
		__this_cpu_dec(*(htab->map_locked[hash]));
		if (use_raw_lock)
			preempt_enable();
		else
			migrate_enable();
		return -EBUSY;
	}

	if (htab_use_raw_lock(htab))
	if (use_raw_lock)
		raw_spin_lock_irqsave(&b->raw_lock, flags);
	else
		spin_lock_irqsave(&b->lock, flags);
@@ -185,12 +193,17 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab,
				      struct bucket *b, u32 hash,
				      unsigned long flags)
{
	bool use_raw_lock = htab_use_raw_lock(htab);

	hash = hash & HASHTAB_MAP_LOCK_MASK;
	if (htab_use_raw_lock(htab))
	if (use_raw_lock)
		raw_spin_unlock_irqrestore(&b->raw_lock, flags);
	else
		spin_unlock_irqrestore(&b->lock, flags);
	__this_cpu_dec(*(htab->map_locked[hash]));
	if (use_raw_lock)
		preempt_enable();
	else
		migrate_enable();
}

@@ -1691,8 +1704,11 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
	/* do not grab the lock unless need it (bucket_cnt > 0). */
	if (locked) {
		ret = htab_lock_bucket(htab, b, batch, &flags);
		if (ret)
			goto next_batch;
		if (ret) {
			rcu_read_unlock();
			bpf_enable_instrumentation();
			goto after_loop;
		}
	}

	bucket_cnt = 0;
+1 −0
Original line number Diff line number Diff line
@@ -68,3 +68,4 @@ unpriv_bpf_disabled # fentry
setget_sockopt                           # attach unexpected error: -524                                               (trampoline)
cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
+126 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
#define _GNU_SOURCE
#include <sched.h>
#include <stdbool.h>
#include <test_progs.h>
#include "htab_update.skel.h"

struct htab_update_ctx {
	int fd;
	int loop;
	bool stop;
};

static void test_reenter_update(void)
{
	struct htab_update *skel;
	unsigned int key, value;
	int err;

	skel = htab_update__open();
	if (!ASSERT_OK_PTR(skel, "htab_update__open"))
		return;

	/* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
	bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
	err = htab_update__load(skel);
	if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
		goto out;

	skel->bss->pid = getpid();
	err = htab_update__attach(skel);
	if (!ASSERT_OK(err, "htab_update__attach"))
		goto out;

	/* Will trigger the reentrancy of bpf_map_update_elem() */
	key = 0;
	value = 0;
	err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
	if (!ASSERT_OK(err, "add element"))
		goto out;

	ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
out:
	htab_update__destroy(skel);
}

static void *htab_update_thread(void *arg)
{
	struct htab_update_ctx *ctx = arg;
	cpu_set_t cpus;
	int i;

	/* Pinned on CPU 0 */
	CPU_ZERO(&cpus);
	CPU_SET(0, &cpus);
	pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus);

	i = 0;
	while (i++ < ctx->loop && !ctx->stop) {
		unsigned int key = 0, value = 0;
		int err;

		err = bpf_map_update_elem(ctx->fd, &key, &value, 0);
		if (err) {
			ctx->stop = true;
			return (void *)(long)err;
		}
	}

	return NULL;
}

static void test_concurrent_update(void)
{
	struct htab_update_ctx ctx;
	struct htab_update *skel;
	unsigned int i, nr;
	pthread_t *tids;
	int err;

	skel = htab_update__open_and_load();
	if (!ASSERT_OK_PTR(skel, "htab_update__open_and_load"))
		return;

	ctx.fd = bpf_map__fd(skel->maps.htab);
	ctx.loop = 1000;
	ctx.stop = false;

	nr = 4;
	tids = calloc(nr, sizeof(*tids));
	if (!ASSERT_NEQ(tids, NULL, "no mem"))
		goto out;

	for (i = 0; i < nr; i++) {
		err = pthread_create(&tids[i], NULL, htab_update_thread, &ctx);
		if (!ASSERT_OK(err, "pthread_create")) {
			unsigned int j;

			ctx.stop = true;
			for (j = 0; j < i; j++)
				pthread_join(tids[j], NULL);
			goto out;
		}
	}

	for (i = 0; i < nr; i++) {
		void *thread_err = NULL;

		pthread_join(tids[i], &thread_err);
		ASSERT_EQ(thread_err, NULL, "update error");
	}

out:
	if (tids)
		free(tids);
	htab_update__destroy(skel);
}

void test_htab_update(void)
{
	if (test__start_subtest("reenter_update"))
		test_reenter_update();
	if (test__start_subtest("concurrent_update"))
		test_concurrent_update();
}
+29 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

char _license[] SEC("license") = "GPL";

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(max_entries, 1);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u32));
} htab SEC(".maps");

int pid = 0;
int update_err = 0;

SEC("?fentry/lookup_elem_raw")
int lookup_elem_raw(void *ctx)
{
	__u32 key = 0, value = 1;

	if ((bpf_get_current_pid_tgid() >> 32) != pid)
		return 0;

	update_err = bpf_map_update_elem(&htab, &key, &value, 0);
	return 0;
}