Commit 7fb4d48d authored by Andrii Nakryiko's avatar Andrii Nakryiko
Browse files

Merge branch 'bpf: CO-RE support in the kernel'



Alexei Starovoitov says:

====================

From: Alexei Starovoitov <ast@kernel.org>

v4->v5:
. Reduce number of memory allocations in candidate cache logic
. Fix couple UAF issues
. Add Andrii's patch to cleanup struct bpf_core_cand
. More thorough tests
. Planned followups:
  - support -v in lskel
  - move struct bpf_core_spec out of bpf_core_apply_relo_insn to
    reduce stack usage
  - implement bpf_core_types_are_compat

v3->v4:
. complete refactor of find candidates logic.
  Now it has small permanent cache.
. Fix a bug in gen_loader related to attach_kind.
. Fix BTF log size limit.
. More tests.

v2->v3:
. addressed Andrii's feedback in every patch.
  New field in union bpf_attr changed from "core_relo" to "core_relos".
. added one more test and checkpatch.pl-ed the set.

v1->v2:
. Refactor uapi to pass 'struct bpf_core_relo' from LLVM into libbpf and further
into the kernel instead of bpf_core_apply_relo() bpf helper. Because of this
change the CO-RE algorithm has an ability to log error and debug events through
the standard bpf verifer log mechanism which was not possible with helper
approach.
. #define RELO_CORE macro was removed and replaced with btf_member_bit_offset() patch.

This set introduces CO-RE support in the kernel.
There are several reasons to add such support:
1. It's a step toward signed BPF programs.
2. It allows golang like languages that struggle to adopt libbpf
   to take advantage of CO-RE powers.
3. Currently the field accessed by 'ldx [R1 + 10]' insn is recognized
   by the verifier purely based on +10 offset. If R1 points to a union
   the verifier picks one of the fields at this offset.
   With CO-RE the kernel can disambiguate the field access.

Alexei Starovoitov (16):
  libbpf: Replace btf__type_by_id() with btf_type_by_id().
  bpf: Rename btf_member accessors.
  bpf: Prepare relo_core.c for kernel duty.
  bpf: Define enum bpf_core_relo_kind as uapi.
  bpf: Pass a set of bpf_core_relo-s to prog_load command.
  bpf: Adjust BTF log size limit.
  bpf: Add bpf_core_add_cands() and wire it into
    bpf_core_apply_relo_insn().
  libbpf: Use CO-RE in the kernel in light skeleton.
  libbpf: Support init of inner maps in light skeleton.
  libbpf: Clean gen_loader's attach kind.
  selftests/bpf: Add lskel version of kfunc test.
  selftests/bpf: Improve inner_map test coverage.
  selftests/bpf: Convert map_ptr_kern test to use light skeleton.
  selftests/bpf: Additional test for CO-RE in the kernel.
  selftests/bpf: Revert CO-RE removal in test_ksyms_weak.
  selftests/bpf: Add CO-RE relocations to verifier scale test.
====================

Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents 64b5b97b 098dc533
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -1732,6 +1732,14 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
			 const struct bpf_insn *insn);
struct bpf_core_ctx {
	struct bpf_verifier_log *log;
	const struct btf *btf;
};

int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
		   int relo_idx, void *insn);

#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
+85 −4
Original line number Diff line number Diff line
@@ -144,6 +144,53 @@ static inline bool btf_type_is_enum(const struct btf_type *t)
	return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
}

static inline bool str_is_empty(const char *s)
{
	return !s || !s[0];
}

static inline u16 btf_kind(const struct btf_type *t)
{
	return BTF_INFO_KIND(t->info);
}

static inline bool btf_is_enum(const struct btf_type *t)
{
	return btf_kind(t) == BTF_KIND_ENUM;
}

static inline bool btf_is_composite(const struct btf_type *t)
{
	u16 kind = btf_kind(t);

	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
}

static inline bool btf_is_array(const struct btf_type *t)
{
	return btf_kind(t) == BTF_KIND_ARRAY;
}

static inline bool btf_is_int(const struct btf_type *t)
{
	return btf_kind(t) == BTF_KIND_INT;
}

static inline bool btf_is_ptr(const struct btf_type *t)
{
	return btf_kind(t) == BTF_KIND_PTR;
}

static inline u8 btf_int_offset(const struct btf_type *t)
{
	return BTF_INT_OFFSET(*(u32 *)(t + 1));
}

static inline u8 btf_int_encoding(const struct btf_type *t)
{
	return BTF_INT_ENCODING(*(u32 *)(t + 1));
}

static inline bool btf_type_is_scalar(const struct btf_type *t)
{
	return btf_type_is_int(t) || btf_type_is_enum(t);
@@ -184,6 +231,11 @@ static inline u16 btf_type_vlen(const struct btf_type *t)
	return BTF_INFO_VLEN(t->info);
}

static inline u16 btf_vlen(const struct btf_type *t)
{
	return btf_type_vlen(t);
}

static inline u16 btf_func_linkage(const struct btf_type *t)
{
	return BTF_INFO_VLEN(t->info);
@@ -194,25 +246,54 @@ static inline bool btf_type_kflag(const struct btf_type *t)
	return BTF_INFO_KFLAG(t->info);
}

static inline u32 btf_member_bit_offset(const struct btf_type *struct_type,
static inline u32 __btf_member_bit_offset(const struct btf_type *struct_type,
					  const struct btf_member *member)
{
	return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset)
					   : member->offset;
}

static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type,
static inline u32 __btf_member_bitfield_size(const struct btf_type *struct_type,
					     const struct btf_member *member)
{
	return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset)
					   : 0;
}

static inline struct btf_member *btf_members(const struct btf_type *t)
{
	return (struct btf_member *)(t + 1);
}

static inline u32 btf_member_bit_offset(const struct btf_type *t, u32 member_idx)
{
	const struct btf_member *m = btf_members(t) + member_idx;

	return __btf_member_bit_offset(t, m);
}

static inline u32 btf_member_bitfield_size(const struct btf_type *t, u32 member_idx)
{
	const struct btf_member *m = btf_members(t) + member_idx;

	return __btf_member_bitfield_size(t, m);
}

static inline const struct btf_member *btf_type_member(const struct btf_type *t)
{
	return (const struct btf_member *)(t + 1);
}

static inline struct btf_array *btf_array(const struct btf_type *t)
{
	return (struct btf_array *)(t + 1);
}

static inline struct btf_enum *btf_enum(const struct btf_type *t)
{
	return (struct btf_enum *)(t + 1);
}

static inline const struct btf_var_secinfo *btf_type_var_secinfo(
		const struct btf_type *t)
{
+77 −1
Original line number Diff line number Diff line
@@ -1342,8 +1342,10 @@ union bpf_attr {
			/* or valid module BTF object fd or 0 to attach to vmlinux */
			__u32		attach_btf_obj_fd;
		};
		__u32		:32;		/* pad */
		__u32		core_relo_cnt;	/* number of bpf_core_relo */
		__aligned_u64	fd_array;	/* array of FDs */
		__aligned_u64	core_relos;
		__u32		core_relo_rec_size; /* sizeof(struct bpf_core_relo) */
	};

	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -6374,4 +6376,78 @@ enum {
	BTF_F_ZERO	=	(1ULL << 3),
};

/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
 * has to be adjusted by relocations. It is emitted by llvm and passed to
 * libbpf and later to the kernel.
 */
enum bpf_core_relo_kind {
	BPF_CORE_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
	BPF_CORE_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
	BPF_CORE_FIELD_EXISTS = 2,           /* field existence in target kernel */
	BPF_CORE_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
	BPF_CORE_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
	BPF_CORE_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
	BPF_CORE_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
	BPF_CORE_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
	BPF_CORE_TYPE_EXISTS = 8,            /* type existence in target kernel */
	BPF_CORE_TYPE_SIZE = 9,              /* type size in bytes */
	BPF_CORE_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
	BPF_CORE_ENUMVAL_VALUE = 11,         /* enum value integer value */
};

/*
 * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf
 * and from libbpf to the kernel.
 *
 * CO-RE relocation captures the following data:
 * - insn_off - instruction offset (in bytes) within a BPF program that needs
 *   its insn->imm field to be relocated with actual field info;
 * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
 *   type or field;
 * - access_str_off - offset into corresponding .BTF string section. String
 *   interpretation depends on specific relocation kind:
 *     - for field-based relocations, string encodes an accessed field using
 *       a sequence of field and array indices, separated by colon (:). It's
 *       conceptually very close to LLVM's getelementptr ([0]) instruction's
 *       arguments for identifying offset to a field.
 *     - for type-based relocations, strings is expected to be just "0";
 *     - for enum value-based relocations, string contains an index of enum
 *       value within its enum type;
 * - kind - one of enum bpf_core_relo_kind;
 *
 * Example:
 *   struct sample {
 *       int a;
 *       struct {
 *           int b[10];
 *       };
 *   };
 *
 *   struct sample *s = ...;
 *   int *x = &s->a;     // encoded as "0:0" (a is field #0)
 *   int *y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1,
 *                       // b is field #0 inside anon struct, accessing elem #5)
 *   int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
 *
 * type_id for all relocs in this example will capture BTF type id of
 * `struct sample`.
 *
 * Such relocation is emitted when using __builtin_preserve_access_index()
 * Clang built-in, passing expression that captures field address, e.g.:
 *
 * bpf_probe_read(&dst, sizeof(dst),
 *		  __builtin_preserve_access_index(&src->a.b.c));
 *
 * In this case Clang will emit field relocation recording necessary data to
 * be able to find offset of embedded `a.b.c` field within `src` struct.
 *
 * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
 */
struct bpf_core_relo {
	__u32 insn_off;
	__u32 type_id;
	__u32 access_str_off;
	enum bpf_core_relo_kind kind;
};

#endif /* _UAPI__LINUX_BPF_H__ */
+4 −0
Original line number Diff line number Diff line
@@ -36,3 +36,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o
obj-${CONFIG_BPF_LSM} += bpf_lsm.o
endif
obj-$(CONFIG_BPF_PRELOAD) += preload/

obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
$(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE
	$(call if_changed_rule,cc_o_c)
+3 −3
Original line number Diff line number Diff line
@@ -165,7 +165,7 @@ void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log)
				break;
			}

			if (btf_member_bitfield_size(t, member)) {
			if (__btf_member_bitfield_size(t, member)) {
				pr_warn("bit field member %s in struct %s is not supported\n",
					mname, st_ops->name);
				break;
@@ -296,7 +296,7 @@ static int check_zero_holes(const struct btf_type *t, void *data)
	const struct btf_type *mtype;

	for_each_member(i, t, member) {
		moff = btf_member_bit_offset(t, member) / 8;
		moff = __btf_member_bit_offset(t, member) / 8;
		if (moff > prev_mend &&
		    memchr_inv(data + prev_mend, 0, moff - prev_mend))
			return -EINVAL;
@@ -387,7 +387,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
		struct bpf_prog *prog;
		u32 moff;

		moff = btf_member_bit_offset(t, member) / 8;
		moff = __btf_member_bit_offset(t, member) / 8;
		ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
		if (ptype == module_type) {
			if (*(void **)(udata + moff))
Loading