Commit 0ac10dc1 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf_iter-uapi-fix'



Yonghong Song says:

====================
Andrii raised a concern that current uapi for bpf iterator map
element is a little restrictive and not suitable for future potential
complex customization. This is a valid suggestion, considering people
may indeed add more complex custimization to the iterator, e.g.,
cgroup_id + user_id, etc. for task or task_file. Another example might
be map_id plus additional control so that the bpf iterator may bail
out a bucket earlier if a bucket has too many elements which may hold
lock too long and impact other parts of systems.

Patch #1 modified uapi with kernel changes. Patch #2
adjusted libbpf api accordingly.

Changelogs:
  v3 -> v4:
    . add a forward declaration of bpf_iter_link_info in
      tools/lib/bpf/bpf.h in case that libbpf is built against
      not-latest uapi bpf.h.
    . target the patch set to "bpf" instead of "bpf-next"
  v2 -> v3:
    . undo "not reject iter_info.map.map_fd == 0" from v1.
      In the future map_fd may become optional, so let us use map_fd == 0
      indicating the map_fd is not set by user space.
    . add link_info_len to bpf_iter_attach_opts to ensure always correct
      link_info_len from user. Otherwise, libbpf may deduce incorrect
      link_info_len if it uses different uapi header than the user app.
  v1 -> v2:
    . ensure link_create target_fd/flags == 0 since they are not used. (Andrii)
    . if either of iter_info ptr == 0 or iter_info_len == 0, but not both,
      return error to user space. (Andrii)
    . do not reject iter_info.map.map_fd == 0, go ahead to use it trying to
      get a map reference since the map_fd is required for map_elem iterator.
    . use bpf_iter_link_info in bpf_iter_attach_opts instead of map_fd.
      this way, user space is responsible to set up bpf_iter_link_info and
      libbpf just passes the data to the kernel, simplifying libbpf design.
      (Andrii)
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6bcaf41f 74fc097d
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info {
	struct bpf_map *map;
};

typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
					union bpf_iter_link_info *linfo,
					struct bpf_iter_aux_info *aux);
typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);

#define BPF_ITER_CTX_ARG_MAX 2
struct bpf_iter_reg {
	const char *target;
	bpf_iter_check_target_t check_target;
	bpf_iter_attach_target_t attach_target;
	bpf_iter_detach_target_t detach_target;
	u32 ctx_arg_info_size;
	enum bpf_iter_link_info req_linfo;
	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
	const struct bpf_iter_seq_info *seq_info;
};
+8 −7
Original line number Diff line number Diff line
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
	__u32	attach_type;		/* program attach type */
};

union bpf_iter_link_info {
	struct {
		__u32	map_fd;
	} map;
};

/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
	MAX_BPF_LINK_TYPE,
};

enum bpf_iter_link_info {
	BPF_ITER_LINK_UNSPEC = 0,
	BPF_ITER_LINK_MAP_FD = 1,

	MAX_BPF_ITER_LINK_INFO,
};

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
 *
 * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
		};
		__u32		attach_type;	/* attach type */
		__u32		flags;		/* extra flags */
		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
		__u32		iter_info_len;	/* iter_info length */
	} link_create;

	struct { /* struct used by BPF_LINK_UPDATE command */
+29 −29
Original line number Diff line number Diff line
@@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link)
	struct bpf_iter_link *iter_link =
		container_of(link, struct bpf_iter_link, link);

	if (iter_link->aux.map)
		bpf_map_put_with_uref(iter_link->aux.map);
	if (iter_link->tinfo->reg_info->detach_target)
		iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
}

static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link)

int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
	union bpf_iter_link_info __user *ulinfo;
	struct bpf_link_primer link_primer;
	struct bpf_iter_target_info *tinfo;
	struct bpf_iter_aux_info aux = {};
	union bpf_iter_link_info linfo;
	struct bpf_iter_link *link;
	u32 prog_btf_id, target_fd;
	u32 prog_btf_id, linfo_len;
	bool existed = false;
	struct bpf_map *map;
	int err;

	if (attr->link_create.target_fd || attr->link_create.flags)
		return -EINVAL;

	memset(&linfo, 0, sizeof(union bpf_iter_link_info));

	ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
	linfo_len = attr->link_create.iter_info_len;
	if (!ulinfo ^ !linfo_len)
		return -EINVAL;

	if (ulinfo) {
		err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
					       linfo_len);
		if (err)
			return err;
		linfo_len = min_t(u32, linfo_len, sizeof(linfo));
		if (copy_from_user(&linfo, ulinfo, linfo_len))
			return -EFAULT;
	}

	prog_btf_id = prog->aux->attach_btf_id;
	mutex_lock(&targets_mutex);
	list_for_each_entry(tinfo, &targets, list) {
@@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
	if (!existed)
		return -ENOENT;

	/* Make sure user supplied flags are target expected. */
	target_fd = attr->link_create.target_fd;
	if (attr->link_create.flags != tinfo->reg_info->req_linfo)
		return -EINVAL;
	if (!attr->link_create.flags && target_fd)
		return -EINVAL;

	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
	if (!link)
		return -ENOMEM;
@@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
		return err;
	}

	if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
		map = bpf_map_get_with_uref(target_fd);
		if (IS_ERR(map)) {
			err = PTR_ERR(map);
			goto cleanup_link;
		}

		aux.map = map;
		err = tinfo->reg_info->check_target(prog, &aux);
	if (tinfo->reg_info->attach_target) {
		err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
		if (err) {
			bpf_map_put_with_uref(map);
			goto cleanup_link;
			bpf_link_cleanup(&link_primer);
			return err;
		}

		link->aux.map = map;
	}

	return bpf_link_settle(&link_primer);

cleanup_link:
	bpf_link_cleanup(&link_primer);
	return err;
}

static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
+29 −8
Original line number Diff line number Diff line
@@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = {
	.seq_info		= &bpf_map_seq_info,
};

static int bpf_iter_check_map(struct bpf_prog *prog,
static int bpf_iter_attach_map(struct bpf_prog *prog,
			       union bpf_iter_link_info *linfo,
			       struct bpf_iter_aux_info *aux)
{
	u32 key_acc_size, value_acc_size, key_size, value_size;
	struct bpf_map *map = aux->map;
	struct bpf_map *map;
	bool is_percpu = false;
	int err = -EINVAL;

	if (!linfo->map.map_fd)
		return -EBADF;

	map = bpf_map_get_with_uref(linfo->map.map_fd);
	if (IS_ERR(map))
		return PTR_ERR(map);

	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
	else if (map->map_type != BPF_MAP_TYPE_HASH &&
		 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
		 map->map_type != BPF_MAP_TYPE_ARRAY)
		return -EINVAL;
		goto put_map;

	key_acc_size = prog->aux->max_rdonly_access;
	value_acc_size = prog->aux->max_rdwr_access;
@@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
	else
		value_size = round_up(map->value_size, 8) * num_possible_cpus();

	if (key_acc_size > key_size || value_acc_size > value_size)
		return -EACCES;
	if (key_acc_size > key_size || value_acc_size > value_size) {
		err = -EACCES;
		goto put_map;
	}

	aux->map = map;
	return 0;

put_map:
	bpf_map_put_with_uref(map);
	return err;
}

static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
{
	bpf_map_put_with_uref(aux->map);
}

DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
@@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,

static const struct bpf_iter_reg bpf_map_elem_reg_info = {
	.target			= "bpf_map_elem",
	.check_target		= bpf_iter_check_map,
	.req_linfo		= BPF_ITER_LINK_MAP_FD,
	.attach_target		= bpf_iter_attach_map,
	.detach_target		= bpf_iter_detach_map,
	.ctx_arg_info_size	= 2,
	.ctx_arg_info		= {
		{ offsetof(struct bpf_iter__bpf_map_elem, key),
+1 −1
Original line number Diff line number Diff line
@@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
	return -EINVAL;
}

#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
static int link_create(union bpf_attr *attr)
{
	enum bpf_prog_type ptype;
Loading