Commit db559117 authored by Kumar Kartikeya Dwivedi's avatar Kumar Kartikeya Dwivedi Committed by Alexei Starovoitov
Browse files

bpf: Consolidate spin_lock, timer management into btf_record



Now that kptr_off_tab has been refactored into btf_record, and can hold
more than one specific field type, accomodate bpf_spin_lock and
bpf_timer as well.

While they don't require any more metadata than offset, having all
special fields in one place allows us to share the same code for
allocated user defined types and handle both map values and these
allocated objects in a similar fashion.

As an optimization, we still keep spin_lock_off and timer_off offsets in
the btf_record structure, just to avoid having to find the btf_field
struct each time their offset is needed. This is mostly needed to
manipulate such objects in a map value at runtime. It's ok to hardcode
just one offset as more than one field is disallowed.

Signed-off-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-8-memxor@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent af085f55
Loading
Loading
Loading
Loading
+32 −21
Original line number Diff line number Diff line
@@ -166,13 +166,13 @@ struct bpf_map_ops {

enum {
	/* Support at most 8 pointers in a BTF type */
	BTF_FIELDS_MAX	      = 8,
	BPF_MAP_OFF_ARR_MAX   = BTF_FIELDS_MAX +
				1 + /* for bpf_spin_lock */
				1,  /* for bpf_timer */
	BTF_FIELDS_MAX	      = 10,
	BPF_MAP_OFF_ARR_MAX   = BTF_FIELDS_MAX,
};

enum btf_field_type {
	BPF_SPIN_LOCK  = (1 << 0),
	BPF_TIMER      = (1 << 1),
	BPF_KPTR_UNREF = (1 << 2),
	BPF_KPTR_REF   = (1 << 3),
	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
@@ -196,6 +196,8 @@ struct btf_field {
struct btf_record {
	u32 cnt;
	u32 field_mask;
	int spin_lock_off;
	int timer_off;
	struct btf_field fields[];
};

@@ -220,10 +222,8 @@ struct bpf_map {
	u32 max_entries;
	u64 map_extra; /* any per-map-type extra fields */
	u32 map_flags;
	int spin_lock_off; /* >=0 valid offset, <0 error */
	struct btf_record *record;
	int timer_off; /* >=0 valid offset, <0 error */
	u32 id;
	struct btf_record *record;
	int numa_node;
	u32 btf_key_type_id;
	u32 btf_value_type_id;
@@ -257,9 +257,29 @@ struct bpf_map {
	bool frozen; /* write-once; write-protected by freeze_mutex */
};

static inline const char *btf_field_type_name(enum btf_field_type type)
{
	switch (type) {
	case BPF_SPIN_LOCK:
		return "bpf_spin_lock";
	case BPF_TIMER:
		return "bpf_timer";
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
		return "kptr";
	default:
		WARN_ON_ONCE(1);
		return "unknown";
	}
}

static inline u32 btf_field_type_size(enum btf_field_type type)
{
	switch (type) {
	case BPF_SPIN_LOCK:
		return sizeof(struct bpf_spin_lock);
	case BPF_TIMER:
		return sizeof(struct bpf_timer);
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
		return sizeof(u64);
@@ -272,6 +292,10 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
static inline u32 btf_field_type_align(enum btf_field_type type)
{
	switch (type) {
	case BPF_SPIN_LOCK:
		return __alignof__(struct bpf_spin_lock);
	case BPF_TIMER:
		return __alignof__(struct bpf_timer);
	case BPF_KPTR_UNREF:
	case BPF_KPTR_REF:
		return __alignof__(u64);
@@ -288,22 +312,8 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
	return rec->field_mask & type;
}

static inline bool map_value_has_spin_lock(const struct bpf_map *map)
{
	return map->spin_lock_off >= 0;
}

static inline bool map_value_has_timer(const struct bpf_map *map)
{
	return map->timer_off >= 0;
}

static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
	if (unlikely(map_value_has_spin_lock(map)))
		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
	if (unlikely(map_value_has_timer(map)))
		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
	if (!IS_ERR_OR_NULL(map->record)) {
		struct btf_field *fields = map->record->fields;
		u32 cnt = map->record->cnt;
@@ -1740,6 +1750,7 @@ void btf_record_free(struct btf_record *rec);
void bpf_map_free_record(struct bpf_map *map);
struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);

struct bpf_map *bpf_map_get(u32 ufd);
+2 −1
Original line number Diff line number Diff line
@@ -163,7 +163,8 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
			   u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t);
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
				    u32 field_mask, u32 value_size);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
+6 −13
Original line number Diff line number Diff line
@@ -306,13 +306,6 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
	return 0;
}

static void check_and_free_fields(struct bpf_array *arr, void *val)
{
	if (map_value_has_timer(&arr->map))
		bpf_timer_cancel_and_free(val + arr->map.timer_off);
	bpf_obj_free_fields(arr->map.record, val);
}

/* Called from syscall or from eBPF program */
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
				 u64 map_flags)
@@ -334,13 +327,13 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
		return -EEXIST;

	if (unlikely((map_flags & BPF_F_LOCK) &&
		     !map_value_has_spin_lock(map)))
		     !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
		return -EINVAL;

	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
		val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
		copy_map_value(map, val, value);
		check_and_free_fields(array, val);
		bpf_obj_free_fields(array->map.record, val);
	} else {
		val = array->value +
			(u64)array->elem_size * (index & array->index_mask);
@@ -348,7 +341,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
			copy_map_value_locked(map, val, value, false);
		else
			copy_map_value(map, val, value);
		check_and_free_fields(array, val);
		bpf_obj_free_fields(array->map.record, val);
	}
	return 0;
}
@@ -385,7 +378,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
	pptr = array->pptrs[index & array->index_mask];
	for_each_possible_cpu(cpu) {
		copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
		check_and_free_fields(array, per_cpu_ptr(pptr, cpu));
		bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
		off += size;
	}
	rcu_read_unlock();
@@ -409,11 +402,11 @@ static void array_map_free_timers(struct bpf_map *map)
	int i;

	/* We don't reset or free fields other than timer on uref dropping to zero. */
	if (!map_value_has_timer(map))
	if (!btf_record_has_field(map->record, BPF_TIMER))
		return;

	for (i = 0; i < array->map.max_entries; i++)
		bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off);
		bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
}

/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+1 −1
Original line number Diff line number Diff line
@@ -382,7 +382,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
	    /* BPF_F_LOCK can only be used in a value with spin_lock */
	    unlikely((map_flags & BPF_F_LOCK) &&
		     !map_value_has_spin_lock(&smap->map)))
		     !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))
		return ERR_PTR(-EINVAL);

	if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
+177 −146
Original line number Diff line number Diff line
@@ -3205,16 +3205,20 @@ enum {
struct btf_field_info {
	enum btf_field_type type;
	u32 off;
	struct {
		u32 type_id;
	} kptr;
};

static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
			   u32 off, int sz, struct btf_field_info *info)
			   u32 off, int sz, enum btf_field_type field_type,
			   struct btf_field_info *info)
{
	if (!__btf_type_is_struct(t))
		return BTF_FIELD_IGNORE;
	if (t->size != sz)
		return BTF_FIELD_IGNORE;
	info->type = field_type;
	info->off = off;
	return BTF_FIELD_FOUND;
}
@@ -3251,28 +3255,66 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
	if (!__btf_type_is_struct(t))
		return -EINVAL;

	info->type_id = res_id;
	info->off = off;
	info->type = type;
	info->off = off;
	info->kptr.type_id = res_id;
	return BTF_FIELD_FOUND;
}

static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
				 const char *name, int sz, int align,
				 enum btf_field_info_type field_type,
static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
			      int *align, int *sz)
{
	int type = 0;

	if (field_mask & BPF_SPIN_LOCK) {
		if (!strcmp(name, "bpf_spin_lock")) {
			if (*seen_mask & BPF_SPIN_LOCK)
				return -E2BIG;
			*seen_mask |= BPF_SPIN_LOCK;
			type = BPF_SPIN_LOCK;
			goto end;
		}
	}
	if (field_mask & BPF_TIMER) {
		if (!strcmp(name, "bpf_timer")) {
			if (*seen_mask & BPF_TIMER)
				return -E2BIG;
			*seen_mask |= BPF_TIMER;
			type = BPF_TIMER;
			goto end;
		}
	}
	/* Only return BPF_KPTR when all other types with matchable names fail */
	if (field_mask & BPF_KPTR) {
		type = BPF_KPTR_REF;
		goto end;
	}
	return 0;
end:
	*sz = btf_field_type_size(type);
	*align = btf_field_type_align(type);
	return type;
}

static int btf_find_struct_field(const struct btf *btf,
				 const struct btf_type *t, u32 field_mask,
				 struct btf_field_info *info, int info_cnt)
{
	int ret, idx = 0, align, sz, field_type;
	const struct btf_member *member;
	struct btf_field_info tmp;
	int ret, idx = 0;
	u32 i, off;
	u32 i, off, seen_mask = 0;

	for_each_member(i, t, member) {
		const struct btf_type *member_type = btf_type_by_id(btf,
								    member->type);

		if (name && strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
		field_type = btf_get_field_type(__btf_name_by_offset(btf, member_type->name_off),
						field_mask, &seen_mask, &align, &sz);
		if (field_type == 0)
			continue;
		if (field_type < 0)
			return field_type;

		off = __btf_member_bit_offset(t, member);
		if (off % 8)
@@ -3280,17 +3322,18 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
			return -EINVAL;
		off /= 8;
		if (off % align)
			return -EINVAL;
			continue;

		switch (field_type) {
		case BTF_FIELD_SPIN_LOCK:
		case BTF_FIELD_TIMER:
			ret = btf_find_struct(btf, member_type, off, sz,
		case BPF_SPIN_LOCK:
		case BPF_TIMER:
			ret = btf_find_struct(btf, member_type, off, sz, field_type,
					      idx < info_cnt ? &info[idx] : &tmp);
			if (ret < 0)
				return ret;
			break;
		case BTF_FIELD_KPTR:
		case BPF_KPTR_UNREF:
		case BPF_KPTR_REF:
			ret = btf_find_kptr(btf, member_type, off, sz,
					    idx < info_cnt ? &info[idx] : &tmp);
			if (ret < 0)
@@ -3310,37 +3353,41 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
}

static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
				const char *name, int sz, int align,
				enum btf_field_info_type field_type,
				struct btf_field_info *info, int info_cnt)
				u32 field_mask, struct btf_field_info *info,
				int info_cnt)
{
	int ret, idx = 0, align, sz, field_type;
	const struct btf_var_secinfo *vsi;
	struct btf_field_info tmp;
	int ret, idx = 0;
	u32 i, off;
	u32 i, off, seen_mask = 0;

	for_each_vsi(i, t, vsi) {
		const struct btf_type *var = btf_type_by_id(btf, vsi->type);
		const struct btf_type *var_type = btf_type_by_id(btf, var->type);

		off = vsi->offset;

		if (name && strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
		field_type = btf_get_field_type(__btf_name_by_offset(btf, var_type->name_off),
						field_mask, &seen_mask, &align, &sz);
		if (field_type == 0)
			continue;
		if (field_type < 0)
			return field_type;

		off = vsi->offset;
		if (vsi->size != sz)
			continue;
		if (off % align)
			return -EINVAL;
			continue;

		switch (field_type) {
		case BTF_FIELD_SPIN_LOCK:
		case BTF_FIELD_TIMER:
			ret = btf_find_struct(btf, var_type, off, sz,
		case BPF_SPIN_LOCK:
		case BPF_TIMER:
			ret = btf_find_struct(btf, var_type, off, sz, field_type,
					      idx < info_cnt ? &info[idx] : &tmp);
			if (ret < 0)
				return ret;
			break;
		case BTF_FIELD_KPTR:
		case BPF_KPTR_UNREF:
		case BPF_KPTR_REF:
			ret = btf_find_kptr(btf, var_type, off, sz,
					    idx < info_cnt ? &info[idx] : &tmp);
			if (ret < 0)
@@ -3360,107 +3407,38 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
}

static int btf_find_field(const struct btf *btf, const struct btf_type *t,
			  enum btf_field_info_type field_type,
			  struct btf_field_info *info, int info_cnt)
			  u32 field_mask, struct btf_field_info *info,
			  int info_cnt)
{
	const char *name;
	int sz, align;

	switch (field_type) {
	case BTF_FIELD_SPIN_LOCK:
		name = "bpf_spin_lock";
		sz = sizeof(struct bpf_spin_lock);
		align = __alignof__(struct bpf_spin_lock);
		break;
	case BTF_FIELD_TIMER:
		name = "bpf_timer";
		sz = sizeof(struct bpf_timer);
		align = __alignof__(struct bpf_timer);
		break;
	case BTF_FIELD_KPTR:
		name = NULL;
		sz = sizeof(u64);
		align = 8;
		break;
	default:
		return -EFAULT;
	}

	if (__btf_type_is_struct(t))
		return btf_find_struct_field(btf, t, name, sz, align, field_type, info, info_cnt);
		return btf_find_struct_field(btf, t, field_mask, info, info_cnt);
	else if (btf_type_is_datasec(t))
		return btf_find_datasec_var(btf, t, name, sz, align, field_type, info, info_cnt);
		return btf_find_datasec_var(btf, t, field_mask, info, info_cnt);
	return -EINVAL;
}

/* find 'struct bpf_spin_lock' in map value.
 * return >= 0 offset if found
 * and < 0 in case of error
 */
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
			  struct btf_field_info *info)
{
	struct btf_field_info info;
	int ret;

	ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, &info, 1);
	if (ret < 0)
		return ret;
	if (!ret)
		return -ENOENT;
	return info.off;
}

int btf_find_timer(const struct btf *btf, const struct btf_type *t)
{
	struct btf_field_info info;
	int ret;

	ret = btf_find_field(btf, t, BTF_FIELD_TIMER, &info, 1);
	if (ret < 0)
		return ret;
	if (!ret)
		return -ENOENT;
	return info.off;
}

struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t)
{
	struct btf_field_info info_arr[BTF_FIELDS_MAX];
	struct btf *kernel_btf = NULL;
	struct module *mod = NULL;
	struct btf_record *rec;
	int ret, i, cnt;

	ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr));
	if (ret < 0)
		return ERR_PTR(ret);
	if (!ret)
		return NULL;

	cnt = ret;
	rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN);
	if (!rec)
		return ERR_PTR(-ENOMEM);
	rec->cnt = 0;
	for (i = 0; i < cnt; i++) {
	const struct btf_type *t;
	struct btf *kernel_btf;
	int ret;
	s32 id;

	/* Find type in map BTF, and use it to look up the matching type
	 * in vmlinux or module BTFs, by name and kind.
	 */
		t = btf_type_by_id(btf, info_arr[i].type_id);
	t = btf_type_by_id(btf, info->kptr.type_id);
	id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
			     &kernel_btf);
		if (id < 0) {
			ret = id;
			goto end;
		}
	if (id < 0)
		return id;

	/* Find and stash the function pointer for the destruction function that
	 * needs to be eventually invoked from the map free path.
	 */
		if (info_arr[i].type == BPF_KPTR_REF) {
	if (info->type == BPF_KPTR_REF) {
		const struct btf_type *dtor_func;
		const char *dtor_func_name;
		unsigned long addr;
@@ -3499,22 +3477,75 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
			ret = -EINVAL;
			goto end_mod;
		}
			rec->fields[i].kptr.dtor = (void *)addr;
		field->kptr.dtor = (void *)addr;
	}

	field->kptr.btf_id = id;
	field->kptr.btf = kernel_btf;
	field->kptr.module = mod;
	return 0;
end_mod:
	module_put(mod);
end_btf:
	btf_put(kernel_btf);
	return ret;
}

struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
				    u32 field_mask, u32 value_size)
{
	struct btf_field_info info_arr[BTF_FIELDS_MAX];
	struct btf_record *rec;
	int ret, i, cnt;

	ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr));
	if (ret < 0)
		return ERR_PTR(ret);
	if (!ret)
		return NULL;

	cnt = ret;
	rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN);
	if (!rec)
		return ERR_PTR(-ENOMEM);

	rec->spin_lock_off = -EINVAL;
	rec->timer_off = -EINVAL;
	for (i = 0; i < cnt; i++) {
		if (info_arr[i].off + btf_field_type_size(info_arr[i].type) > value_size) {
			WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size);
			ret = -EFAULT;
			goto end;
		}

		rec->field_mask |= info_arr[i].type;
		rec->fields[i].offset = info_arr[i].off;
		rec->fields[i].type = info_arr[i].type;
		rec->fields[i].kptr.btf_id = id;
		rec->fields[i].kptr.btf = kernel_btf;
		rec->fields[i].kptr.module = mod;

		switch (info_arr[i].type) {
		case BPF_SPIN_LOCK:
			WARN_ON_ONCE(rec->spin_lock_off >= 0);
			/* Cache offset for faster lookup at runtime */
			rec->spin_lock_off = rec->fields[i].offset;
			break;
		case BPF_TIMER:
			WARN_ON_ONCE(rec->timer_off >= 0);
			/* Cache offset for faster lookup at runtime */
			rec->timer_off = rec->fields[i].offset;
			break;
		case BPF_KPTR_UNREF:
		case BPF_KPTR_REF:
			ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
			if (ret < 0)
				goto end;
			break;
		default:
			ret = -EFAULT;
			goto end;
		}
		rec->cnt++;
	}
	return rec;
end_mod:
	module_put(mod);
end_btf:
	btf_put(kernel_btf);
end:
	btf_record_free(rec);
	return ERR_PTR(ret);
Loading