Commit 6fcd486b authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Daniel Borkmann
Browse files

bpf: Refactor RCU enforcement in the verifier.



bpf_rcu_read_lock/unlock() are only available in clang compiled kernels. Lack
of such key mechanism makes it impossible for sleepable bpf programs to use RCU
pointers.

Allow bpf_rcu_read_lock/unlock() in GCC compiled kernels (though GCC doesn't
support btf_type_tag yet) and allowlist certain field dereferences in important
data structures like tast_struct, cgroup, socket that are used by sleepable
programs either as RCU pointer or full trusted pointer (which is valid outside
of RCU CS). Use BTF_TYPE_SAFE_RCU and BTF_TYPE_SAFE_TRUSTED macros for such
tagging. They will be removed once GCC supports btf_type_tag.

With that refactor check_ptr_to_btf_access(). Make it strict in enforcing
PTR_TRUSTED and PTR_UNTRUSTED while deprecating old PTR_TO_BTF_ID without
modifier flags. There is a chance that this strict enforcement might break
existing programs (especially on GCC compiled kernels), but this cleanup has to
start sooner than later. Note PTR_TO_CTX access still yields old deprecated
PTR_TO_BTF_ID. Once it's converted to strict PTR_TRUSTED or PTR_UNTRUSTED the
kfuncs and helpers will be able to default to KF_TRUSTED_ARGS. KF_RCU will
remain as a weaker version of KF_TRUSTED_ARGS where obj refcnt could be 0.

Adjust rcu_read_lock selftest to run on gcc and clang compiled kernels.

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarDavid Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20230303041446.3630-7-alexei.starovoitov@gmail.com
parent 0047d834
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -2279,7 +2279,7 @@ struct bpf_core_ctx {

bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
				const struct bpf_reg_state *reg,
				int off);
				int off, const char *suffix);

bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
			       const struct btf *reg_btf, u32 reg_id,
+0 −1
Original line number Diff line number Diff line
@@ -537,7 +537,6 @@ struct bpf_verifier_env {
	bool bypass_spec_v1;
	bool bypass_spec_v4;
	bool seen_direct_write;
	bool rcu_tag_supported;
	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
	const struct bpf_line_info *prev_linfo;
	struct bpf_verifier_log log;
+13 −3
Original line number Diff line number Diff line
@@ -6163,6 +6163,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
	const char *tname, *mname, *tag_value;
	u32 vlen, elem_id, mid;

	*flag = 0;
again:
	tname = __btf_name_by_offset(btf, t->name_off);
	if (!btf_type_is_struct(t)) {
@@ -6329,6 +6330,15 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
		 * of this field or inside of this struct
		 */
		if (btf_type_is_struct(mtype)) {
			if (BTF_INFO_KIND(mtype->info) == BTF_KIND_UNION &&
			    btf_type_vlen(mtype) != 1)
				/*
				 * walking unions yields untrusted pointers
				 * with exception of __bpf_md_ptr and other
				 * unions with a single member
				 */
				*flag |= PTR_UNTRUSTED;

			/* our field must be inside that union or struct */
			t = mtype;

@@ -6373,7 +6383,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
			stype = btf_type_skip_modifiers(btf, mtype->type, &id);
			if (btf_type_is_struct(stype)) {
				*next_btf_id = id;
				*flag = tmp_flag;
				*flag |= tmp_flag;
				return WALK_PTR;
			}
		}
@@ -8357,7 +8367,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,

bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
				const struct bpf_reg_state *reg,
				int off)
				int off, const char *suffix)
{
	struct btf *btf = reg->btf;
	const struct btf_type *walk_type, *safe_type;
@@ -8374,7 +8384,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,

	tname = btf_name_by_offset(btf, walk_type->name_off);

	ret = snprintf(safe_tname, sizeof(safe_tname), "%s__safe_fields", tname);
	ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
	if (ret < 0)
		return false;

+20 −20
Original line number Diff line number Diff line
@@ -427,26 +427,26 @@ BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_first, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_and, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_or, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_full, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_any, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_and, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_or, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU)
BTF_SET8_END(cpumask_kfunc_btf_ids)

static const struct btf_kfunc_id_set cpumask_kfunc_set = {
+121 −57
Original line number Diff line number Diff line
@@ -5073,29 +5073,76 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
	return 0;
}
#define BTF_TYPE_SAFE_NESTED(__type)  __PASTE(__type, __safe_fields)
#define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
#define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
BTF_TYPE_SAFE_NESTED(struct task_struct) {
/*
 * Allow list few fields as RCU trusted or full trusted.
 * This logic doesn't allow mix tagging and will be removed once GCC supports
 * btf_type_tag.
 */
/* RCU trusted: these fields are trusted in RCU CS and never NULL */
BTF_TYPE_SAFE_RCU(struct task_struct) {
	const cpumask_t *cpus_ptr;
	struct css_set __rcu *cgroups;
	struct task_struct __rcu *real_parent;
	struct task_struct *group_leader;
};
BTF_TYPE_SAFE_NESTED(struct css_set) {
BTF_TYPE_SAFE_RCU(struct css_set) {
	struct cgroup *dfl_cgrp;
};
static bool nested_ptr_is_trusted(struct bpf_verifier_env *env,
/* full trusted: these fields are trusted even outside of RCU CS and never NULL */
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
	__bpf_md_ptr(struct seq_file *, seq);
};
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
	__bpf_md_ptr(struct bpf_iter_meta *, meta);
	__bpf_md_ptr(struct task_struct *, task);
};
BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
	struct file *file;
};
BTF_TYPE_SAFE_TRUSTED(struct file) {
	struct inode *f_inode;
};
BTF_TYPE_SAFE_TRUSTED(struct dentry) {
	/* no negative dentry-s in places where bpf can see it */
	struct inode *d_inode;
};
BTF_TYPE_SAFE_TRUSTED(struct socket) {
	struct sock *sk;
};
static bool type_is_rcu(struct bpf_verifier_env *env,
			struct bpf_reg_state *reg,
			int off)
{
	/* If its parent is not trusted, it can't regain its trusted status. */
	if (!is_trusted_reg(reg))
		return false;
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct));
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct css_set));
	return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu");
}
static bool type_is_trusted(struct bpf_verifier_env *env,
			    struct bpf_reg_state *reg,
			    int off)
{
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
	return btf_nested_type_is_trusted(&env->log, reg, off);
	return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted");
}
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
@@ -5181,49 +5228,58 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
	if (ret < 0)
		return ret;
	if (ret != PTR_TO_BTF_ID) {
		/* just mark; */
	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
		/* If this is an untrusted pointer, all pointers formed by walking it
		 * also inherit the untrusted flag.
		 */
	if (type_flag(reg->type) & PTR_UNTRUSTED)
		flag |= PTR_UNTRUSTED;
		flag = PTR_UNTRUSTED;
	} else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
		/* By default any pointer obtained from walking a trusted pointer is no
		 * longer trusted, unless the field being accessed has explicitly been
	 * marked as inheriting its parent's state of trust.
		 * marked as inheriting its parent's state of trust (either full or RCU).
		 * For example:
		 * 'cgroups' pointer is untrusted if task->cgroups dereference
		 * happened in a sleepable program outside of bpf_rcu_read_lock()
		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
		 *
	 * An RCU-protected pointer can also be deemed trusted if we are in an
	 * RCU read region. This case is handled below.
		 * A regular RCU-protected pointer with __rcu tag can also be deemed
		 * trusted if we are in an RCU CS. Such pointer can be NULL.
		 */
	if (nested_ptr_is_trusted(env, reg, off)) {
		if (type_is_trusted(env, reg, off)) {
			flag |= PTR_TRUSTED;
		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
			if (type_is_rcu(env, reg, off)) {
				/* ignore __rcu tag and mark it MEM_RCU */
				flag |= MEM_RCU;
			} else if (flag & MEM_RCU) {
				/* __rcu tagged pointers can be NULL */
				flag |= PTR_MAYBE_NULL;
			} else if (flag & (MEM_PERCPU | MEM_USER)) {
				/* keep as-is */
			} else {
				/* walking unknown pointers yields untrusted pointer */
				flag = PTR_UNTRUSTED;
			}
		} else {
			/*
		 * task->cgroups is trusted. It provides a stronger guarantee
		 * than __rcu tag on 'cgroups' field in 'struct task_struct'.
		 * Clear MEM_RCU in such case.
			 * If not in RCU CS or MEM_RCU pointer can be NULL then
			 * aggressively mark as untrusted otherwise such
			 * pointers will be plain PTR_TO_BTF_ID without flags
			 * and will be allowed to be passed into helpers for
			 * compat reasons.
			 */
		flag &= ~MEM_RCU;
			flag = PTR_UNTRUSTED;
		}
	} else {
		/* Old compat. Deprecated */
		flag &= ~PTR_TRUSTED;
	}
	if (flag & MEM_RCU) {
		/* Mark value register as MEM_RCU only if it is protected by
		 * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
		 * itself can already indicate trustedness inside the rcu
		 * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
		 * it could be null in some cases.
		 */
		if (in_rcu_cs(env) && (is_trusted_reg(reg) || is_rcu_reg(reg)))
			flag |= PTR_MAYBE_NULL;
		else
			flag &= ~MEM_RCU;
	} else if (reg->type & MEM_RCU) {
		/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
		 * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
		 */
		flag |= PTR_UNTRUSTED;
	}
	if (atype == BPF_READ && value_regno >= 0)
		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
@@ -10049,10 +10105,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
	if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
		verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
		return -EACCES;
	}
	if (env->cur_state->active_rcu_lock) {
		struct bpf_func_state *state;
@@ -14911,9 +14963,23 @@ static int do_check(struct bpf_verifier_env *env)
				 * src_reg == stack|map in some other branch.
				 * Reject it.
				 */
				verbose(env, "same insn cannot be used with different pointers\n");
				if (base_type(src_reg_type) == PTR_TO_BTF_ID &&
				    base_type(*prev_src_type) == PTR_TO_BTF_ID) {
					/*
					 * Have to support a use case when one path through
					 * the program yields TRUSTED pointer while another
					 * is UNTRUSTED. Fallback to UNTRUSTED to generate
					 * BPF_PROBE_MEM.
					 */
					*prev_src_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
				} else {
					verbose(env,
						"The same insn cannot be used with different pointers: %s",
						reg_type_str(env, src_reg_type));
					verbose(env, " != %s\n", reg_type_str(env, *prev_src_type));
					return -EINVAL;
				}
			}
		} else if (class == BPF_STX) {
			enum bpf_reg_type *prev_dst_type, dst_reg_type;
@@ -17984,8 +18050,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
	env->bypass_spec_v1 = bpf_bypass_spec_v1();
	env->bypass_spec_v4 = bpf_bypass_spec_v4();
	env->bpf_capable = bpf_capable();
	env->rcu_tag_supported = btf_vmlinux &&
		btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
	if (is_priv)
		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
Loading