Commit c344b9fc authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf: add __percpu tagging in vmlinux BTF'

Hao Luo says:

====================

This patchset is very much similar to Yonghong's patchset on adding
__user tagging [1], where a "user" btf_type_tag was introduced to
describe __user memory pointers. Similar approach can be applied on
__percpu pointers. The __percpu attribute in kernel is used to identify
pointers that point to memory allocated in percpu region. Normally,
accessing __percpu memory requires using special functions like
per_cpu_ptr() etc. Directly accessing __percpu pointer is meaningless.

Currently vmlinux BTF does not have a way to differentiate a __percpu
pointer from a regular pointer. So BPF programs are allowed to load
__percpu memory directly, which is an incorrect behavior.

With the previous work that encodes __user information in BTF, a nice
framework has been set up to allow us to encode __percpu information in
BTF and let the verifier to reject programs that try to directly access
percpu pointer. Previously, there is a PTR_TO_PERCPU_BTF_ID reg type which
is used to represent those percpu static variables in the kernel. Pahole
is able to collect variables that are stored in ".data..percpu" section
in the kernel image and emit BTF information for those variables. The
bpf_per_cpu_ptr() and bpf_this_cpu_ptr() helper functions were added to
access these variables. Now with __percpu information, we can tag those
__percpu fields in a struct (such as cgroup->rstat_cpu) and allow the
pair of bpf percpu helpers to access them as well.

In addition to adding __percpu tagging, this patchset also fixes a
harmless bug in the previous patch that introduced __user. Patch 01/04
is for that. Patch 02/04 adds the new attribute "percpu". Patch 03/04
adds MEM_PERCPU tag for PTR_TO_BTF_ID and replaces PTR_TO_PERCPU_BTF_ID
with (BTF_ID | MEM_PERCPU). Patch 04/04 refactors the btf_tag test a bit
and adds tests for percpu tag.

Like [1], the minimal requirements for btf_type_tag is
clang (>= clang14) and pahole (>= 1.23).

[1] https://lore.kernel.org/bpf/20211220015110.3rqxk5qwub3pa2gh@ast-mbp.dhcp.thefacebook.com/t/


====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 401af75c 50c6b8a9
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -334,7 +334,15 @@ enum bpf_type_flag {
	/* MEM is in user address space. */
	MEM_USER		= BIT(3 + BPF_BASE_TYPE_BITS),

	__BPF_TYPE_LAST_FLAG	= MEM_USER,
	/* MEM is a percpu memory. MEM_PERCPU tags PTR_TO_BTF_ID. When tagged
	 * with MEM_PERCPU, PTR_TO_BTF_ID _cannot_ be directly accessed. In
	 * order to drop this tag, it must be passed into bpf_per_cpu_ptr()
	 * or bpf_this_cpu_ptr(), which will return the pointer corresponding
	 * to the specified cpu.
	 */
	MEM_PERCPU		= BIT(4 + BPF_BASE_TYPE_BITS),

	__BPF_TYPE_LAST_FLAG	= MEM_PERCPU,
};

/* Max number of base types. */
@@ -516,7 +524,6 @@ enum bpf_reg_type {
	 */
	PTR_TO_MEM,		 /* reg points to valid memory region */
	PTR_TO_BUF,		 /* reg points to a read/write buffer */
	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
	PTR_TO_FUNC,		 /* reg points to a bpf program function */
	__BPF_REG_TYPE_MAX,

+6 −1
Original line number Diff line number Diff line
@@ -38,7 +38,12 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
#  define __user
# endif
# define __iomem
# if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \
	__has_attribute(btf_type_tag)
#  define __percpu	__attribute__((btf_type_tag("percpu")))
# else
#  define __percpu
# endif
# define __rcu
# define __chk_user_ptr(x)	(void)0
# define __chk_io_ptr(x)	(void)0
+7 −1
Original line number Diff line number Diff line
@@ -5057,6 +5057,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
		tag_value = __btf_name_by_offset(btf, t->name_off);
		if (strcmp(tag_value, "user") == 0)
			info->reg_type |= MEM_USER;
		if (strcmp(tag_value, "percpu") == 0)
			info->reg_type |= MEM_PERCPU;
	}

	/* skip modifiers */
@@ -5285,12 +5287,16 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
				return -EACCES;
			}

			/* check __user tag */
			/* check type tag */
			t = btf_type_by_id(btf, mtype->type);
			if (btf_type_is_type_tag(t)) {
				tag_value = __btf_name_by_offset(btf, t->name_off);
				/* check __user tag */
				if (strcmp(tag_value, "user") == 0)
					tmp_flag = MEM_USER;
				/* check __percpu tag */
				if (strcmp(tag_value, "percpu") == 0)
					tmp_flag = MEM_PERCPU;
			}

			stype = btf_type_skip_modifiers(btf, mtype->type, &id);
+16 −11
Original line number Diff line number Diff line
@@ -554,7 +554,6 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
		[PTR_TO_TP_BUFFER]	= "tp_buffer",
		[PTR_TO_XDP_SOCK]	= "xdp_sock",
		[PTR_TO_BTF_ID]		= "ptr_",
		[PTR_TO_PERCPU_BTF_ID]	= "percpu_ptr_",
		[PTR_TO_MEM]		= "mem",
		[PTR_TO_BUF]		= "buf",
		[PTR_TO_FUNC]		= "func",
@@ -562,8 +561,7 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
	};

	if (type & PTR_MAYBE_NULL) {
		if (base_type(type) == PTR_TO_BTF_ID ||
		    base_type(type) == PTR_TO_PERCPU_BTF_ID)
		if (base_type(type) == PTR_TO_BTF_ID)
			strncpy(postfix, "or_null_", 16);
		else
			strncpy(postfix, "_or_null", 16);
@@ -575,6 +573,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
		strncpy(prefix, "alloc_", 32);
	if (type & MEM_USER)
		strncpy(prefix, "user_", 32);
	if (type & MEM_PERCPU)
		strncpy(prefix, "percpu_", 32);

	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
		 prefix, str[base_type(type)], postfix);
@@ -697,8 +697,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
			const char *sep = "";

			verbose(env, "%s", reg_type_str(env, t));
			if (base_type(t) == PTR_TO_BTF_ID ||
			    base_type(t) == PTR_TO_PERCPU_BTF_ID)
			if (base_type(t) == PTR_TO_BTF_ID)
				verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
			verbose(env, "(");
/*
@@ -2783,7 +2782,6 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
	case PTR_TO_XDP_SOCK:
	case PTR_TO_BTF_ID:
	case PTR_TO_BUF:
	case PTR_TO_PERCPU_BTF_ID:
	case PTR_TO_MEM:
	case PTR_TO_FUNC:
	case PTR_TO_MAP_KEY:
@@ -4203,6 +4201,13 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
		return -EACCES;
	}

	if (reg->type & MEM_PERCPU) {
		verbose(env,
			"R%d is ptr_%s access percpu memory: off=%d\n",
			regno, tname, off);
		return -EACCES;
	}

	if (env->ops->btf_struct_access) {
		ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
						  off, size, atype, &btf_id, &flag);
@@ -4562,7 +4567,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
		err = check_tp_buffer_access(env, reg, regno, off, size);
		if (!err && t == BPF_READ && value_regno >= 0)
			mark_reg_unknown(env, regs, value_regno);
	} else if (reg->type == PTR_TO_BTF_ID) {
	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
		   !type_may_be_null(reg->type)) {
		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
					      value_regno);
	} else if (reg->type == CONST_PTR_TO_MAP) {
@@ -4808,7 +4814,7 @@ static int check_stack_range_initialized(
		}

		if (is_spilled_reg(&state->stack[spi]) &&
		    state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
		    base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
			goto mark;

		if (is_spilled_reg(&state->stack[spi]) &&
@@ -5264,7 +5270,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | ME
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -9676,7 +9682,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
			dst_reg->mem_size = aux->btf_var.mem_size;
			break;
		case PTR_TO_BTF_ID:
		case PTR_TO_PERCPU_BTF_ID:
			dst_reg->btf = aux->btf_var.btf;
			dst_reg->btf_id = aux->btf_var.btf_id;
			break;
@@ -11876,7 +11881,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
	type = t->type;
	t = btf_type_skip_modifiers(btf, type, NULL);
	if (percpu) {
		aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
		aux->btf_var.btf = btf;
		aux->btf_var.btf_id = type;
	} else if (!btf_type_is_struct(t)) {
+14 −0
Original line number Diff line number Diff line
@@ -33,6 +33,10 @@ struct bpf_testmod_btf_type_tag_2 {
	struct bpf_testmod_btf_type_tag_1 __user *p;
};

struct bpf_testmod_btf_type_tag_3 {
	struct bpf_testmod_btf_type_tag_1 __percpu *p;
};

noinline int
bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) {
	BTF_TYPE_EMIT(func_proto_typedef);
@@ -46,6 +50,16 @@ bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) {
	return arg->p->a;
}

noinline int
bpf_testmod_test_btf_type_tag_percpu_1(struct bpf_testmod_btf_type_tag_1 __percpu *arg) {
	return arg->a;
}

noinline int
bpf_testmod_test_btf_type_tag_percpu_2(struct bpf_testmod_btf_type_tag_3 *arg) {
	return arg->p->a;
}

noinline int bpf_testmod_loop_test(int n)
{
	int i, sum = 0;
Loading