Commit 2900005e authored by Yan Zhu's avatar Yan Zhu Committed by Daniel Borkmann
Browse files

bpf: Move BPF sysctls from kernel/sysctl.c to BPF core



We're moving sysctls out of kernel/sysctl.c as it is a mess. We
already moved all filesystem sysctls out. And with time the goal
is to move all sysctls out to their own subsystem/actual user.

kernel/sysctl.c has grown to an insane mess and its easy to run
into conflicts with it. The effort to move them out into various
subsystems is part of this.

Signed-off-by: default avatarYan Zhu <zhuyan34@huawei.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/bpf/20220407070759.29506-1-zhuyan34@huawei.com
parent 31231092
Loading
Loading
Loading
Loading
+87 −0
Original line number Diff line number Diff line
@@ -4908,3 +4908,90 @@ const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
const struct bpf_prog_ops bpf_syscall_prog_ops = {
	.test_run = bpf_prog_test_run_syscall,
};

#ifdef CONFIG_SYSCTL
static int bpf_stats_handler(struct ctl_table *table, int write,
			     void *buffer, size_t *lenp, loff_t *ppos)
{
	struct static_key *key = (struct static_key *)table->data;
	static int saved_val;
	int val, ret;
	struct ctl_table tmp = {
		.data   = &val,
		.maxlen = sizeof(val),
		.mode   = table->mode,
		.extra1 = SYSCTL_ZERO,
		.extra2 = SYSCTL_ONE,
	};

	if (write && !capable(CAP_SYS_ADMIN))
		return -EPERM;

	mutex_lock(&bpf_stats_enabled_mutex);
	val = saved_val;
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
	if (write && !ret && val != saved_val) {
		if (val)
			static_key_slow_inc(key);
		else
			static_key_slow_dec(key);
		saved_val = val;
	}
	mutex_unlock(&bpf_stats_enabled_mutex);
	return ret;
}

void __weak unpriv_ebpf_notify(int new_state)
{
}

static int bpf_unpriv_handler(struct ctl_table *table, int write,
			      void *buffer, size_t *lenp, loff_t *ppos)
{
	int ret, unpriv_enable = *(int *)table->data;
	bool locked_state = unpriv_enable == 1;
	struct ctl_table tmp = *table;

	if (write && !capable(CAP_SYS_ADMIN))
		return -EPERM;

	tmp.data = &unpriv_enable;
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
	if (write && !ret) {
		if (locked_state && unpriv_enable != 1)
			return -EPERM;
		*(int *)table->data = unpriv_enable;
	}

	unpriv_ebpf_notify(unpriv_enable);

	return ret;
}

static struct ctl_table bpf_syscall_table[] = {
	{
		.procname	= "unprivileged_bpf_disabled",
		.data		= &sysctl_unprivileged_bpf_disabled,
		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
		.mode		= 0644,
		.proc_handler	= bpf_unpriv_handler,
		.extra1		= SYSCTL_ZERO,
		.extra2		= SYSCTL_TWO,
	},
	{
		.procname	= "bpf_stats_enabled",
		.data		= &bpf_stats_enabled_key.key,
		.maxlen		= sizeof(bpf_stats_enabled_key),
		.mode		= 0644,
		.proc_handler	= bpf_stats_handler,
	},
	{ }
};

static int __init bpf_syscall_sysctl_init(void)
{
	register_sysctl_init("kernel", bpf_syscall_table);
	return 0;
}
late_initcall(bpf_syscall_sysctl_init);
#endif /* CONFIG_SYSCTL */
+0 −79
Original line number Diff line number Diff line
@@ -62,7 +62,6 @@
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
#include <linux/kexec.h>
#include <linux/bpf.h>
#include <linux/mount.h>
#include <linux/userfaultfd_k.h>
#include <linux/latencytop.h>
@@ -148,66 +147,6 @@ static const int max_extfrag_threshold = 1000;

#endif /* CONFIG_SYSCTL */

#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
static int bpf_stats_handler(struct ctl_table *table, int write,
			     void *buffer, size_t *lenp, loff_t *ppos)
{
	struct static_key *key = (struct static_key *)table->data;
	static int saved_val;
	int val, ret;
	struct ctl_table tmp = {
		.data   = &val,
		.maxlen = sizeof(val),
		.mode   = table->mode,
		.extra1 = SYSCTL_ZERO,
		.extra2 = SYSCTL_ONE,
	};

	if (write && !capable(CAP_SYS_ADMIN))
		return -EPERM;

	mutex_lock(&bpf_stats_enabled_mutex);
	val = saved_val;
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
	if (write && !ret && val != saved_val) {
		if (val)
			static_key_slow_inc(key);
		else
			static_key_slow_dec(key);
		saved_val = val;
	}
	mutex_unlock(&bpf_stats_enabled_mutex);
	return ret;
}

void __weak unpriv_ebpf_notify(int new_state)
{
}

static int bpf_unpriv_handler(struct ctl_table *table, int write,
			      void *buffer, size_t *lenp, loff_t *ppos)
{
	int ret, unpriv_enable = *(int *)table->data;
	bool locked_state = unpriv_enable == 1;
	struct ctl_table tmp = *table;

	if (write && !capable(CAP_SYS_ADMIN))
		return -EPERM;

	tmp.data = &unpriv_enable;
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
	if (write && !ret) {
		if (locked_state && unpriv_enable != 1)
			return -EPERM;
		*(int *)table->data = unpriv_enable;
	}

	unpriv_ebpf_notify(unpriv_enable);

	return ret;
}
#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */

/*
 * /proc/sys support
 */
@@ -2299,24 +2238,6 @@ static struct ctl_table kern_table[] = {
		.extra2		= SYSCTL_ONE,
	},
#endif
#ifdef CONFIG_BPF_SYSCALL
	{
		.procname	= "unprivileged_bpf_disabled",
		.data		= &sysctl_unprivileged_bpf_disabled,
		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
		.mode		= 0644,
		.proc_handler	= bpf_unpriv_handler,
		.extra1		= SYSCTL_ZERO,
		.extra2		= SYSCTL_TWO,
	},
	{
		.procname	= "bpf_stats_enabled",
		.data		= &bpf_stats_enabled_key.key,
		.maxlen		= sizeof(bpf_stats_enabled_key),
		.mode		= 0644,
		.proc_handler	= bpf_stats_handler,
	},
#endif
#if defined(CONFIG_TREE_RCU)
	{
		.procname	= "panic_on_rcu_stall",