Commit 46565696 authored by Kumar Kartikeya Dwivedi's avatar Kumar Kartikeya Dwivedi Committed by Alexei Starovoitov
Browse files

selftests/bpf: Add test for race in btf_try_get_module



This adds a complete test case to ensure we never take references to
modules not in MODULE_STATE_LIVE, which can lead to UAF, and it also
ensures we never access btf->kfunc_set_tab in an inconsistent state.

The test uses userfaultfd to artificially widen the race.

When run on an unpatched kernel, it leads to the following splat:

[root@(none) bpf]# ./test_progs -t bpf_mod_race/ksym
[   55.498171] BUG: unable to handle page fault for address: fffffbfff802548b
[   55.499206] #PF: supervisor read access in kernel mode
[   55.499855] #PF: error_code(0x0000) - not-present page
[   55.500555] PGD a4fa9067 P4D a4fa9067 PUD a4fa5067 PMD 1b44067 PTE 0
[   55.501499] Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
[   55.502195] CPU: 0 PID: 83 Comm: kworker/0:2 Tainted: G           OE     5.16.0-rc4+ #151
[   55.503388] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.15.0-1 04/01/2014
[   55.504777] Workqueue: events bpf_prog_free_deferred
[   55.505563] RIP: 0010:kasan_check_range+0x184/0x1d0
[   55.509140] RSP: 0018:ffff88800560fcf0 EFLAGS: 00010282
[   55.509977] RAX: fffffbfff802548b RBX: fffffbfff802548c RCX: ffffffff9337b6ba
[   55.511096] RDX: fffffbfff802548c RSI: 0000000000000004 RDI: ffffffffc012a458
[   55.512143] RBP: fffffbfff802548b R08: 0000000000000001 R09: ffffffffc012a45b
[   55.513228] R10: fffffbfff802548b R11: 0000000000000001 R12: ffff888001b5f598
[   55.514332] R13: ffff888004f49ac8 R14: 0000000000000000 R15: ffff888092449400
[   55.515418] FS:  0000000000000000(0000) GS:ffff888092400000(0000) knlGS:0000000000000000
[   55.516705] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   55.517560] CR2: fffffbfff802548b CR3: 0000000007c10006 CR4: 0000000000770ef0
[   55.518672] PKRU: 55555554
[   55.519022] Call Trace:
[   55.519483]  <TASK>
[   55.519884]  module_put.part.0+0x2a/0x180
[   55.520642]  bpf_prog_free_deferred+0x129/0x2e0
[   55.521478]  process_one_work+0x4fa/0x9e0
[   55.522122]  ? pwq_dec_nr_in_flight+0x100/0x100
[   55.522878]  ? rwlock_bug.part.0+0x60/0x60
[   55.523551]  worker_thread+0x2eb/0x700
[   55.524176]  ? __kthread_parkme+0xd8/0xf0
[   55.524853]  ? process_one_work+0x9e0/0x9e0
[   55.525544]  kthread+0x23a/0x270
[   55.526088]  ? set_kthread_struct+0x80/0x80
[   55.526798]  ret_from_fork+0x1f/0x30
[   55.527413]  </TASK>
[   55.527813] Modules linked in: bpf_testmod(OE) [last unloaded: bpf_testmod]
[   55.530846] CR2: fffffbfff802548b
[   55.531341] ---[ end trace 1af41803c054ad6d ]---
[   55.532136] RIP: 0010:kasan_check_range+0x184/0x1d0
[   55.535887] RSP: 0018:ffff88800560fcf0 EFLAGS: 00010282
[   55.536711] RAX: fffffbfff802548b RBX: fffffbfff802548c RCX: ffffffff9337b6ba
[   55.537821] RDX: fffffbfff802548c RSI: 0000000000000004 RDI: ffffffffc012a458
[   55.538899] RBP: fffffbfff802548b R08: 0000000000000001 R09: ffffffffc012a45b
[   55.539928] R10: fffffbfff802548b R11: 0000000000000001 R12: ffff888001b5f598
[   55.541021] R13: ffff888004f49ac8 R14: 0000000000000000 R15: ffff888092449400
[   55.542108] FS:  0000000000000000(0000) GS:ffff888092400000(0000) knlGS:0000000000000000
[   55.543260]CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   55.544136] CR2: fffffbfff802548b CR3: 0000000007c10006 CR4: 0000000000770ef0
[   55.545317] PKRU: 55555554
[   55.545671] note: kworker/0:2[83] exited with preempt_count 1

Signed-off-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-11-memxor@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent c1ff181f
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -172,6 +172,8 @@ int noinline bpf_fentry_test1(int a)
{
	return a + 1;
}
EXPORT_SYMBOL_GPL(bpf_fentry_test1);
ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);

int noinline bpf_fentry_test2(int a, u64 b)
{
+4 −0
Original line number Diff line number Diff line
@@ -118,6 +118,8 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
	.check_set = &bpf_testmod_check_kfunc_ids,
};

extern int bpf_fentry_test1(int a);

static int bpf_testmod_init(void)
{
	int ret;
@@ -125,6 +127,8 @@ static int bpf_testmod_init(void)
	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
	if (ret < 0)
		return ret;
	if (bpf_fentry_test1(0) < 0)
		return -EINVAL;
	return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}

+1 −0
Original line number Diff line number Diff line
@@ -52,3 +52,4 @@ CONFIG_NETFILTER=y
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y
CONFIG_NF_CONNTRACK=y
CONFIG_USERFAULTFD=y
+230 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <unistd.h>
#include <pthread.h>
#include <sys/mman.h>
#include <stdatomic.h>
#include <test_progs.h>
#include <sys/syscall.h>
#include <linux/module.h>
#include <linux/userfaultfd.h>

#include "ksym_race.skel.h"
#include "bpf_mod_race.skel.h"
#include "kfunc_call_race.skel.h"

/* This test crafts a race between btf_try_get_module and do_init_module, and
 * checks whether btf_try_get_module handles the invocation for a well-formed
 * but uninitialized module correctly. Unless the module has completed its
 * initcalls, the verifier should fail the program load and return ENXIO.
 *
 * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
 * sleep, then the BPF program is loaded and the return value from verifier is
 * inspected. After this, the userfaultfd is closed so that the module loading
 * thread makes forward progress, and fmod_ret injects an error so that the
 * module load fails and it is freed.
 *
 * If the verifier succeeded in loading the supplied program, it will end up
 * taking reference to freed module, and trigger a crash when the program fd
 * is closed later. This is true for both kfuncs and ksyms. In both cases,
 * the crash is triggered inside bpf_prog_free_deferred, when module reference
 * is finally released.
 */

struct test_config {
	const char *str_open;
	void *(*bpf_open_and_load)();
	void (*bpf_destroy)(void *);
};

enum test_state {
	_TS_INVALID,
	TS_MODULE_LOAD,
	TS_MODULE_LOAD_FAIL,
};

static _Atomic enum test_state state = _TS_INVALID;

static int sys_finit_module(int fd, const char *param_values, int flags)
{
	return syscall(__NR_finit_module, fd, param_values, flags);
}

static int sys_delete_module(const char *name, unsigned int flags)
{
	return syscall(__NR_delete_module, name, flags);
}

static int load_module(const char *mod)
{
	int ret, fd;

	fd = open("bpf_testmod.ko", O_RDONLY);
	if (fd < 0)
		return fd;

	ret = sys_finit_module(fd, "", 0);
	close(fd);
	if (ret < 0)
		return ret;
	return 0;
}

static void *load_module_thread(void *p)
{

	if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
		atomic_store(&state, TS_MODULE_LOAD);
	else
		atomic_store(&state, TS_MODULE_LOAD_FAIL);
	return p;
}

static int sys_userfaultfd(int flags)
{
	return syscall(__NR_userfaultfd, flags);
}

static int test_setup_uffd(void *fault_addr)
{
	struct uffdio_register uffd_register = {};
	struct uffdio_api uffd_api = {};
	int uffd;

	uffd = sys_userfaultfd(O_CLOEXEC);
	if (uffd < 0)
		return -errno;

	uffd_api.api = UFFD_API;
	uffd_api.features = 0;
	if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
		close(uffd);
		return -1;
	}

	uffd_register.range.start = (unsigned long)fault_addr;
	uffd_register.range.len = 4096;
	uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
	if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
		close(uffd);
		return -1;
	}
	return uffd;
}

static void test_bpf_mod_race_config(const struct test_config *config)
{
	void *fault_addr, *skel_fail;
	struct bpf_mod_race *skel;
	struct uffd_msg uffd_msg;
	pthread_t load_mod_thrd;
	_Atomic int *blockingp;
	int uffd, ret;

	fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
	if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
		return;

	if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
		goto end_mmap;

	skel = bpf_mod_race__open();
	if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
		goto end_module;

	skel->rodata->bpf_mod_race_config.tgid = getpid();
	skel->rodata->bpf_mod_race_config.inject_error = -4242;
	skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
	if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
		goto end_destroy;
	blockingp = (_Atomic int *)&skel->bss->bpf_blocking;

	if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
		goto end_destroy;

	uffd = test_setup_uffd(fault_addr);
	if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
		goto end_destroy;

	if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
		       "load module thread"))
		goto end_uffd;

	/* Now, we either fail loading module, or block in bpf prog, spin to find out */
	while (!atomic_load(&state) && !atomic_load(blockingp))
		;
	if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
		goto end_join;
	if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
		pthread_kill(load_mod_thrd, SIGKILL);
		goto end_uffd;
	}

	/* We might have set bpf_blocking to 1, but may have not blocked in
	 * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
	 */
	if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
		       "read uffd block event"))
		goto end_join;
	if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
		goto end_join;

	/* We know that load_mod_thrd is blocked in the fmod_ret program, the
	 * module state is still MODULE_STATE_COMING because mod->init hasn't
	 * returned. This is the time we try to load a program calling kfunc and
	 * check if we get ENXIO from verifier.
	 */
	skel_fail = config->bpf_open_and_load();
	ret = errno;
	if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
		/* Close uffd to unblock load_mod_thrd */
		close(uffd);
		uffd = -1;
		while (atomic_load(blockingp) != 2)
			;
		ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
		config->bpf_destroy(skel_fail);
		goto end_join;

	}
	ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
	ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");

	close(uffd);
	uffd = -1;
end_join:
	pthread_join(load_mod_thrd, NULL);
	if (uffd < 0)
		ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
end_uffd:
	if (uffd >= 0)
		close(uffd);
end_destroy:
	bpf_mod_race__destroy(skel);
	ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
end_module:
	sys_delete_module("bpf_testmod", 0);
	ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
end_mmap:
	munmap(fault_addr, 4096);
	atomic_store(&state, _TS_INVALID);
}

static const struct test_config ksym_config = {
	.str_open = "ksym_race__open_and_load",
	.bpf_open_and_load = (void *)ksym_race__open_and_load,
	.bpf_destroy = (void *)ksym_race__destroy,
};

static const struct test_config kfunc_config = {
	.str_open = "kfunc_call_race__open_and_load",
	.bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
	.bpf_destroy = (void *)kfunc_call_race__destroy,
};

void serial_test_bpf_mod_race(void)
{
	if (test__start_subtest("ksym (used_btfs UAF)"))
		test_bpf_mod_race_config(&ksym_config);
	if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
		test_bpf_mod_race_config(&kfunc_config);
}
+100 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

const volatile struct {
	/* thread to activate trace programs for */
	pid_t tgid;
	/* return error from __init function */
	int inject_error;
	/* uffd monitored range start address */
	void *fault_addr;
} bpf_mod_race_config = { -1 };

int bpf_blocking = 0;
int res_try_get_module = -1;

static __always_inline bool check_thread_id(void)
{
	struct task_struct *task = bpf_get_current_task_btf();

	return task->tgid == bpf_mod_race_config.tgid;
}

/* The trace of execution is something like this:
 *
 * finit_module()
 *   load_module()
 *     prepare_coming_module()
 *       notifier_call(MODULE_STATE_COMING)
 *         btf_parse_module()
 *         btf_alloc_id()		// Visible to userspace at this point
 *         list_add(btf_mod->list, &btf_modules)
 *     do_init_module()
 *       freeinit = kmalloc()
 *       ret = mod->init()
 *         bpf_prog_widen_race()
 *           bpf_copy_from_user()
 *             ...<sleep>...
 *       if (ret < 0)
 *         ...
 *         free_module()
 * return ret
 *
 * At this point, module loading thread is blocked, we now load the program:
 *
 * bpf_check
 *   add_kfunc_call/check_pseudo_btf_id
 *     btf_try_get_module
 *       try_get_module_live == false
 *     return -ENXIO
 *
 * Without the fix (try_get_module_live in btf_try_get_module):
 *
 * bpf_check
 *   add_kfunc_call/check_pseudo_btf_id
 *     btf_try_get_module
 *       try_get_module == true
 *     <store module reference in btf_kfunc_tab or used_btf array>
 *   ...
 * return fd
 *
 * Now, if we inject an error in the blocked program, our module will be freed
 * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
 * Later, when bpf program is freed, it will try to module_put already freed
 * module. This is why try_get_module_live returns false if mod->state is not
 * MODULE_STATE_LIVE.
 */

SEC("fmod_ret.s/bpf_fentry_test1")
int BPF_PROG(widen_race, int a, int ret)
{
	char dst;

	if (!check_thread_id())
		return 0;
	/* Indicate that we will attempt to block */
	bpf_blocking = 1;
	bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
	return bpf_mod_race_config.inject_error;
}

SEC("fexit/do_init_module")
int BPF_PROG(fexit_init_module, struct module *mod, int ret)
{
	if (!check_thread_id())
		return 0;
	/* Indicate that we finished blocking */
	bpf_blocking = 2;
	return 0;
}

SEC("fexit/btf_try_get_module")
int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
{
	res_try_get_module = !!mod;
	return 0;
}

char _license[] SEC("license") = "GPL";
Loading