Commit 7f16d2aa authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Introduce composable bpf types'

Hao Luo says:

====================
This patch set consists of two changes:

 - a cleanup of arg_type, ret_type and reg_type which try to make those
   types composable. (patch 1/9 - patch 6/9)
 - a bug fix that prevents bpf programs from writing kernel memory.
   (patch 7/9 - patch 9/9)

The purpose of the cleanup is to find a scalable way to express type
nullness and read-onliness. This patchset introduces two flags that
can be applied on all three types: PTR_MAYBE_NULL and MEM_RDONLY.
Previous types such as ARG_XXX_OR_NULL can now be written as

 ARG_XXX | PTR_MAYBE_NULL

Similarly, PTR_TO_RDONLY_BUF is now "PTR_TO_BUF | MEM_RDONLY".

Flags can be composed, as ARGs can be both MEM_RDONLY and MAYBE_NULL.

 ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY

Based on this new composable types, patch 7/9 applies MEM_RDONLY on
PTR_TO_MEM, in order to tag the returned memory from per_cpu_ptr as
read-only. Therefore fixing a previous bug that one can leverage
per_cpu_ptr to modify kernel memory within BPF programs.

Patch 8/9 generalizes the use of MEM_RDONLY further by tagging a set of
helper arguments ARG_PTR_TO_MEM with MEM_RDONLY. Some helper functions
may override their arguments, such as bpf_d_path, bpf_snprintf. In this
patch, we narrow the ARG_PTR_TO_MEM to be compatible with only a subset
of memory types. This prevents these helpers from writing read-only
memories. For the helpers that do not write its arguments, we add tag
MEM_RDONLY to allow taking a RDONLY memory as argument.

Changes since v1:
 - use %u to print base_type(type) instead of %lu. (Andrii, patch 3/9)
 - improve reg_type_str() by appending '_or_null' and prepending 'rdonly_'.
   use preallocated buffer in 'bpf_env'.
 - unified handling of the previous XXX_OR_NULL in adjust_ptr_min_max_vals
   (Andrii, patch 4/9)
 - move PTR_TO_MAP_KEY up to PTR_TO_MAP_VALUE so that we don't have
   to change to drivers that assume the numeric values of bpf_reg.
   (patch 4/9)
 - reintroduce the typo from previous commits in fixes tags (Andrii, patch 7/9)
 - extensive comments on the reason behind folding flags in
   check_reg_type (Andrii, patch 8/9)

Changes since RFC v2:
 - renamed BPF_BASE_TYPE to a more succinct name base_type and move its
   definition to bpf_verifier.h. Same for BPF_TYPE_FLAG. (Alexei)
 - made checking MEM_RDONLY in check_reg_type() universal (Alexei)
 - ran through majority of test_progs and fixed bugs in RFC v2:
   - fixed incorrect BPF_BASE_TYPE_MASK. The high bit of GENMASK should
     be BITS - 1, rather than BITS. patch 1/9.
   - fixed incorrect conditions when checking ARG_PTR_TO_MAP_VALUE in
     check_func_arg(). See patch 2/9.
   - fixed a bug where PTR_TO_BTF_ID may be combined with MEM_RDONLY,
     causing the check in check_mem_access() to fall through to the
     'else' branch. See check_helper_call() in patch 7/9.
 - fixed build failure on netronome driver. Entries in bpf_reg_type have
   been ordered. patch 4/9.
 - fixed build warnings of using '%d' to print base_type. patch 4/9
 - unify arg_type_may_be_null() and reg_type_may_be_null() into a single
   type_may_be_null().

Previous versions:

 v1:
   https://lwn.net/Articles/877938/

 RFC v2:
   https://lwn.net/Articles/877171/

 RFC v1:
   https://lore.kernel.org/bpf/20211109003052.3499225-1-haoluo@google.com/T/
   https://lore.kernel.org/bpf/20211109021624.1140446-8-haoluo@google.com/T/


====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e967a20a 9497c458
Loading
Loading
Loading
Loading
+77 −24
Original line number Diff line number Diff line
@@ -297,6 +297,34 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,

extern const struct bpf_map_ops bpf_map_offload_ops;

/* bpf_type_flag contains a set of flags that are applicable to the values of
 * arg_type, ret_type and reg_type. For example, a pointer value may be null,
 * or a memory is read-only. We classify types into two categories: base types
 * and extended types. Extended types are base types combined with a type flag.
 *
 * Currently there are no more than 32 base types in arg_type, ret_type and
 * reg_types.
 */
#define BPF_BASE_TYPE_BITS	8

enum bpf_type_flag {
	/* PTR may be NULL. */
	PTR_MAYBE_NULL		= BIT(0 + BPF_BASE_TYPE_BITS),

	/* MEM is read-only. When applied on bpf_arg, it indicates the arg is
	 * compatible with both mutable and immutable memory.
	 */
	MEM_RDONLY		= BIT(1 + BPF_BASE_TYPE_BITS),

	__BPF_TYPE_LAST_FLAG	= MEM_RDONLY,
};

/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT	(1UL << BPF_BASE_TYPE_BITS)

/* Max number of all types. */
#define BPF_TYPE_LIMIT		(__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1))

/* function argument constraints */
enum bpf_arg_type {
	ARG_DONTCARE = 0,	/* unused argument in helper function */
@@ -308,13 +336,11 @@ enum bpf_arg_type {
	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
	ARG_PTR_TO_UNINIT_MAP_VALUE,	/* pointer to valid memory used to store a map value */
	ARG_PTR_TO_MAP_VALUE_OR_NULL,	/* pointer to stack used as map value or NULL */

	/* the following constraints used to prototype bpf_memcmp() and other
	 * functions that access data on eBPF program stack
	 */
	ARG_PTR_TO_MEM,		/* pointer to valid memory (stack, packet, map value) */
	ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */
	ARG_PTR_TO_UNINIT_MEM,	/* pointer to memory does not need to be initialized,
				 * helper function must fill all bytes or clear
				 * them in error case.
@@ -324,42 +350,65 @@ enum bpf_arg_type {
	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */

	ARG_PTR_TO_CTX,		/* pointer to context */
	ARG_PTR_TO_CTX_OR_NULL,	/* pointer to context or NULL */
	ARG_ANYTHING,		/* any (initialized) argument is ok */
	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
	ARG_PTR_TO_INT,		/* pointer to int */
	ARG_PTR_TO_LONG,	/* pointer to long */
	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
	ARG_PTR_TO_SOCKET_OR_NULL,	/* pointer to bpf_sock (fullsock) or NULL */
	ARG_PTR_TO_BTF_ID,	/* pointer to in-kernel struct */
	ARG_PTR_TO_ALLOC_MEM,	/* pointer to dynamically allocated memory */
	ARG_PTR_TO_ALLOC_MEM_OR_NULL,	/* pointer to dynamically allocated memory or NULL */
	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
	ARG_PTR_TO_STACK,	/* pointer to stack */
	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
	__BPF_ARG_TYPE_MAX,

	/* Extended arg_types. */
	ARG_PTR_TO_MAP_VALUE_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE,
	ARG_PTR_TO_MEM_OR_NULL		= PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
	ARG_PTR_TO_CTX_OR_NULL		= PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
	ARG_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
	ARG_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
	ARG_PTR_TO_STACK_OR_NULL	= PTR_MAYBE_NULL | ARG_PTR_TO_STACK,

	/* This must be the last entry. Its purpose is to ensure the enum is
	 * wide enough to hold the higher bits reserved for bpf_type_flag.
	 */
	__BPF_ARG_TYPE_LIMIT	= BPF_TYPE_LIMIT,
};
static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);

/* type of values returned from helper functions */
enum bpf_return_type {
	RET_INTEGER,			/* function returns integer */
	RET_VOID,			/* function doesn't return anything */
	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
	RET_PTR_TO_SOCKET,		/* returns a pointer to a socket */
	RET_PTR_TO_TCP_SOCK,		/* returns a pointer to a tcp_sock */
	RET_PTR_TO_SOCK_COMMON,		/* returns a pointer to a sock_common */
	RET_PTR_TO_ALLOC_MEM,		/* returns a pointer to dynamically allocated memory */
	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
	__BPF_RET_TYPE_MAX,

	/* Extended ret_types. */
	RET_PTR_TO_MAP_VALUE_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE,
	RET_PTR_TO_SOCKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
	RET_PTR_TO_TCP_SOCK_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,

	/* This must be the last entry. Its purpose is to ensure the enum is
	 * wide enough to hold the higher bits reserved for bpf_type_flag.
	 */
	__BPF_RET_TYPE_LIMIT	= BPF_TYPE_LIMIT,
};
static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);

/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
 * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
@@ -421,18 +470,15 @@ enum bpf_reg_type {
	PTR_TO_CTX,		 /* reg points to bpf_context */
	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
	PTR_TO_STACK,		 /* reg == frame_pointer + offset */
	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
	PTR_TO_PACKET,		 /* reg points to skb->data */
	PTR_TO_PACKET_END,	 /* skb->data + headlen */
	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
	PTR_TO_SOCK_COMMON,	 /* reg points to sock_common */
	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
	/* PTR_TO_BTF_ID points to a kernel struct that does not need
@@ -450,18 +496,25 @@ enum bpf_reg_type {
	 * been checked for null. Used primarily to inform the verifier
	 * an explicit null check is required for this struct.
	 */
	PTR_TO_BTF_ID_OR_NULL,
	PTR_TO_MEM,		 /* reg points to valid memory region */
	PTR_TO_MEM_OR_NULL,	 /* reg points to valid memory region or NULL */
	PTR_TO_RDONLY_BUF,	 /* reg points to a readonly buffer */
	PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
	PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
	PTR_TO_BUF,		 /* reg points to a read/write buffer */
	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
	PTR_TO_FUNC,		 /* reg points to a bpf program function */
	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
	__BPF_REG_TYPE_MAX,

	/* Extended reg_types. */
	PTR_TO_MAP_VALUE_OR_NULL	= PTR_MAYBE_NULL | PTR_TO_MAP_VALUE,
	PTR_TO_SOCKET_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_SOCKET,
	PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
	PTR_TO_TCP_SOCK_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
	PTR_TO_BTF_ID_OR_NULL		= PTR_MAYBE_NULL | PTR_TO_BTF_ID,

	/* This must be the last entry. Its purpose is to ensure the enum is
	 * wide enough to hold the higher bits reserved for bpf_type_flag.
	 */
	__BPF_REG_TYPE_LIMIT	= BPF_TYPE_LIMIT,
};
static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);

/* The information passed from prog-specific *_is_valid_access
 * back to the verifier.
+17 −0
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@
 * that converting umax_value to int cannot overflow.
 */
#define BPF_MAX_VAR_SIZ	(1 << 29)
/* size of type_str_buf in bpf_verifier. */
#define TYPE_STR_BUF_LEN 64

/* Liveness marks, used for registers and spilled-regs (in stack slots).
 * Read marks propagate upwards until they find a write mark; they record that
@@ -484,6 +486,8 @@ struct bpf_verifier_env {
	/* Same as scratched_regs but for stack slots */
	u64 scratched_stack_slots;
	u32 prev_log_len, prev_insn_print_len;
	/* buffer used in reg_type_str() to generate reg_type string */
	char type_str_buf[TYPE_STR_BUF_LEN];
};

__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
@@ -546,5 +550,18 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
			    struct bpf_attach_target_info *tgt_info);
void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab);

#define BPF_BASE_TYPE_MASK	GENMASK(BPF_BASE_TYPE_BITS - 1, 0)

/* extract base type from bpf_{arg, return, reg}_type. */
static inline u32 base_type(u32 type)
{
	return type & BPF_BASE_TYPE_MASK;
}

/* extract flags from an extended type. See bpf_type_flag in bpf.h. */
static inline u32 type_flag(u32 type)
{
	return type & ~BPF_BASE_TYPE_MASK;
}

#endif /* _LINUX_BPF_VERIFIER_H */
+7 −5
Original line number Diff line number Diff line
@@ -4940,10 +4940,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
	/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
	for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
		const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
		u32 type, flag;

		if (ctx_arg_info->offset == off &&
		    (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
		     ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
		type = base_type(ctx_arg_info->reg_type);
		flag = type_flag(ctx_arg_info->reg_type);
		if (ctx_arg_info->offset == off && type == PTR_TO_BUF &&
		    (flag & PTR_MAYBE_NULL)) {
			info->reg_type = ctx_arg_info->reg_type;
			return true;
		}
@@ -5857,7 +5859,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
				return -EINVAL;
			}

			reg->type = PTR_TO_MEM_OR_NULL;
			reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
			reg->id = ++env->id_gen;

			continue;
@@ -6351,7 +6353,7 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
	.func		= bpf_btf_find_by_name_kind,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_MEM,
	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
	.arg2_type	= ARG_CONST_SIZE,
	.arg3_type	= ARG_ANYTHING,
	.arg4_type	= ARG_ANYTHING,
+1 −1
Original line number Diff line number Diff line
@@ -1789,7 +1789,7 @@ static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type	= ARG_PTR_TO_MEM,
	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
	.arg3_type	= ARG_CONST_SIZE,
};

+6 −6
Original line number Diff line number Diff line
@@ -531,7 +531,7 @@ const struct bpf_func_proto bpf_strtol_proto = {
	.func		= bpf_strtol,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_MEM,
	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
	.arg2_type	= ARG_CONST_SIZE,
	.arg3_type	= ARG_ANYTHING,
	.arg4_type	= ARG_PTR_TO_LONG,
@@ -559,7 +559,7 @@ const struct bpf_func_proto bpf_strtoul_proto = {
	.func		= bpf_strtoul,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_MEM,
	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
	.arg2_type	= ARG_CONST_SIZE,
	.arg3_type	= ARG_ANYTHING,
	.arg4_type	= ARG_PTR_TO_LONG,
@@ -645,7 +645,7 @@ const struct bpf_func_proto bpf_event_output_data_proto = {
	.arg1_type      = ARG_PTR_TO_CTX,
	.arg2_type      = ARG_CONST_MAP_PTR,
	.arg3_type      = ARG_ANYTHING,
	.arg4_type      = ARG_PTR_TO_MEM,
	.arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
};

@@ -682,7 +682,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
	.func		= bpf_per_cpu_ptr,
	.gpl_only	= false,
	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
	.arg2_type	= ARG_ANYTHING,
};
@@ -695,7 +695,7 @@ BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
	.func		= bpf_this_cpu_ptr,
	.gpl_only	= false,
	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID,
	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
};

@@ -1026,7 +1026,7 @@ const struct bpf_func_proto bpf_snprintf_proto = {
	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
	.arg3_type	= ARG_PTR_TO_CONST_STR,
	.arg4_type	= ARG_PTR_TO_MEM_OR_NULL,
	.arg4_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
};

Loading