Commit 6099754a authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf: Add bpf_rcu_read_lock() support'



Yonghong Song says:

====================

Currently, without rcu attribute info in BTF, the verifier treats
rcu tagged pointer as a normal pointer. This might be a problem
for sleepable program where rcu_read_lock()/unlock() is not available.
For example, for a sleepable fentry program, if rcu protected memory
access is interleaved with a sleepable helper/kfunc, it is possible
the memory access after the sleepable helper/kfunc might be invalid
since the object might have been freed then. Even without
a sleepable helper/kfunc, without rcu_read_lock() protection,
it is possible that the rcu protected object might be release
in the middle of bpf program execution which may cause incorrect
result.

To prevent above cases, enable btf_type_tag("rcu") attributes,
introduce new bpf_rcu_read_lock/unlock() kfuncs and add verifier support.

In the rest of patch set, Patch 1 enabled btf_type_tag for __rcu
attribute. Patche 2 added might_sleep in bpf_func_proto. Patch 3 added new
bpf_rcu_read_lock/unlock() kfuncs and verifier support.
Patch 4 added some tests for these two new kfuncs.

Changelogs:
  v9 -> v10:
    . if no rcu tag support in vmlinux btf, using bpf_rcu_read_lock/unlock()
      will cause verification error.
    . at bpf_rcu_read_unlock(), invalidate rcu ptr to PTR_UNTRUSTED
      instead of SCALAR_VALUE.
    . a few other comment changes and other minor changes.
  v8 -> v9:
    . remove sleepable prog check for ld_abs/ind checking in rcu read
      lock region.
    . fix a test failure with gcc-compiled kernel.
    . a couple of other minor fixes.
  v7 -> v8:
    . add might_sleep in bpf_func_proto so we can easily identify whether
      a helper is sleepable or not.
    . do not enforce rcu rules for sleepable, e.g., rcu dereference must
      be in a bpf_rcu_read_lock region. This is to keep old code working
      fine.
    . Mark 'b' in 'b = a->b' (b is tagged with __rcu) as MEM_RCU only if
      'b = a->b' in rcu read region and 'a' is trusted. This adds safety
      guarantee for 'b' inside the rcu read region.
  v6 -> v7:
    . rebase on top of bpf-next.
    . remove the patch which enables sleepable program using
      cgrp_local_storage map. This is orthogonal to this patch set
      and will be addressed separately.
    . mark the rcu pointer dereference result as UNTRUSTED if inside
      a bpf_rcu_read_lock() region.
  v5 -> v6:
    . fix selftest prog miss_unlock which tested nested locking.
    . add comments in selftest prog cgrp_succ to explain how to handle
      nested memory access after rcu memory load.
  v4 -> v5:
    . add new test to aarch64 deny list.
  v3 -> v4:
    . fix selftest failures when built with gcc. gcc doesn't support
      btf_type_tag yet and some tests relies on that. skip these
      tests if vmlinux BTF does not have btf_type_tag("rcu").
  v2 -> v3:
    . went back to MEM_RCU approach with invalidate rcu ptr registers
      at bpf_rcu_read_unlock() place.
    . remove KF_RCU_LOCK/UNLOCK flag and compare btf_id at verification
      time instead.
  v1 -> v2:
    . use kfunc instead of helper for bpf_rcu_read_lock/unlock.
    . not use MEM_RCU bpf_type_flag, instead use active_rcu_lock
      in reg state to identify rcu ptr's.
    . Add more self tests.
    . add new test to s390x deny list.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f471748b 48671232
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -572,6 +572,9 @@ enum bpf_type_flag {
	 */
	PTR_TRUSTED		= BIT(12 + BPF_BASE_TYPE_BITS),

	/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
	MEM_RCU			= BIT(13 + BPF_BASE_TYPE_BITS),

	__BPF_TYPE_FLAG_MAX,
	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
};
@@ -682,6 +685,7 @@ struct bpf_func_proto {
	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
	bool gpl_only;
	bool pkt_access;
	bool might_sleep;
	enum bpf_return_type ret_type;
	union {
		struct {
+4 −1
Original line number Diff line number Diff line
@@ -344,6 +344,7 @@ struct bpf_verifier_state {
		u32 id;
	} active_lock;
	bool speculative;
	bool active_rcu_lock;

	/* first and last insn idx of this verifier state */
	u32 first_insn_idx;
@@ -445,6 +446,7 @@ struct bpf_insn_aux_data {
	u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
	bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
	bool zext_dst; /* this insn zero extends dst reg */
	bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
	u8 alu_state; /* used in combination with alu_limit */

	/* below fields are initialized once */
@@ -534,6 +536,7 @@ struct bpf_verifier_env {
	bool bypass_spec_v1;
	bool bypass_spec_v4;
	bool seen_direct_write;
	bool rcu_tag_supported;
	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
	const struct bpf_line_info *prev_linfo;
	struct bpf_verifier_log log;
@@ -680,7 +683,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
	}
}

#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)

static inline bool bpf_type_has_unsafe_modifiers(u32 type)
{
+2 −1
Original line number Diff line number Diff line
@@ -49,7 +49,8 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
# endif
# define __iomem
# define __percpu	BTF_TYPE_TAG(percpu)
# define __rcu
# define __rcu		BTF_TYPE_TAG(rcu)

# define __chk_user_ptr(x)	(void)0
# define __chk_io_ptr(x)	(void)0
/* context/locking */
+4 −2
Original line number Diff line number Diff line
@@ -151,6 +151,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
static const struct bpf_func_proto bpf_ima_inode_hash_proto = {
	.func		= bpf_ima_inode_hash,
	.gpl_only	= false,
	.might_sleep	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_BTF_ID,
	.arg1_btf_id	= &bpf_ima_inode_hash_btf_ids[0],
@@ -169,6 +170,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file)
static const struct bpf_func_proto bpf_ima_file_hash_proto = {
	.func		= bpf_ima_file_hash,
	.gpl_only	= false,
	.might_sleep	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_BTF_ID,
	.arg1_btf_id	= &bpf_ima_file_hash_btf_ids[0],
@@ -221,9 +223,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
	case BPF_FUNC_bprm_opts_set:
		return &bpf_bprm_opts_set_proto;
	case BPF_FUNC_ima_inode_hash:
		return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
		return &bpf_ima_inode_hash_proto;
	case BPF_FUNC_ima_file_hash:
		return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
		return &bpf_ima_file_hash_proto;
	case BPF_FUNC_get_attach_cookie:
		return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
#ifdef CONFIG_NET
+3 −0
Original line number Diff line number Diff line
@@ -6238,6 +6238,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
				/* check __percpu tag */
				if (strcmp(tag_value, "percpu") == 0)
					tmp_flag = MEM_PERCPU;
				/* check __rcu tag */
				if (strcmp(tag_value, "rcu") == 0)
					tmp_flag = MEM_RCU;
			}

			stype = btf_type_skip_modifiers(btf, mtype->type, &id);
Loading