Commit 89ae89f5 authored by Jiri Olsa's avatar Jiri Olsa Committed by Alexei Starovoitov
Browse files

bpf: Add multi uprobe link



Adding new multi uprobe link that allows to attach bpf program
to multiple uprobes.

Uprobes to attach are specified via new link_create uprobe_multi
union:

  struct {
    __aligned_u64   path;
    __aligned_u64   offsets;
    __aligned_u64   ref_ctr_offsets;
    __u32           cnt;
    __u32           flags;
  } uprobe_multi;

Uprobes are defined for single binary specified in path and multiple
calling sites specified in offsets array with optional reference
counters specified in ref_ctr_offsets array. All specified arrays
have length of 'cnt'.

The 'flags' supports single bit for now that marks the uprobe as
return probe.

Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarYafang Shao <laoar.shao@gmail.com>
Signed-off-by: default avatarJiri Olsa <jolsa@kernel.org>
Acked-by: default avatarYonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20230809083440.3209381-4-jolsa@kernel.org


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 3505cb9f
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -752,6 +752,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
			    u32 *fd_type, const char **buf,
			    u64 *probe_offset, u64 *probe_addr);
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -798,6 +799,11 @@ bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}
static inline int
bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}
#endif

enum {
+16 −0
Original line number Diff line number Diff line
@@ -1039,6 +1039,7 @@ enum bpf_attach_type {
	BPF_NETFILTER,
	BPF_TCX_INGRESS,
	BPF_TCX_EGRESS,
	BPF_TRACE_UPROBE_MULTI,
	__MAX_BPF_ATTACH_TYPE
};

@@ -1057,6 +1058,7 @@ enum bpf_link_type {
	BPF_LINK_TYPE_STRUCT_OPS = 9,
	BPF_LINK_TYPE_NETFILTER = 10,
	BPF_LINK_TYPE_TCX = 11,
	BPF_LINK_TYPE_UPROBE_MULTI = 12,
	MAX_BPF_LINK_TYPE,
};

@@ -1190,6 +1192,13 @@ enum {
	BPF_F_KPROBE_MULTI_RETURN = (1U << 0)
};

/* link_create.uprobe_multi.flags used in LINK_CREATE command for
 * BPF_TRACE_UPROBE_MULTI attach type to create return probe.
 */
enum {
	BPF_F_UPROBE_MULTI_RETURN = (1U << 0)
};

/* link_create.netfilter.flags used in LINK_CREATE command for
 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
 */
@@ -1626,6 +1635,13 @@ union bpf_attr {
				};
				__u64		expected_revision;
			} tcx;
			struct {
				__aligned_u64	path;
				__aligned_u64	offsets;
				__aligned_u64	ref_ctr_offsets;
				__u32		cnt;
				__u32		flags;
			} uprobe_multi;
		};
	} link_create;

+11 −3
Original line number Diff line number Diff line
@@ -2819,6 +2819,8 @@ static void bpf_link_free_id(int id)
 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
 * is not decremented, it's the responsibility of a calling code that failed
 * to complete bpf_link initialization.
 * This helper eventually calls link's dealloc callback, but does not call
 * link's release callback.
 */
void bpf_link_cleanup(struct bpf_link_primer *primer)
{
@@ -3756,8 +3758,12 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
		if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI &&
		    attach_type != BPF_TRACE_KPROBE_MULTI)
			return -EINVAL;
		if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI &&
		    attach_type != BPF_TRACE_UPROBE_MULTI)
			return -EINVAL;
		if (attach_type != BPF_PERF_EVENT &&
		    attach_type != BPF_TRACE_KPROBE_MULTI)
		    attach_type != BPF_TRACE_KPROBE_MULTI &&
		    attach_type != BPF_TRACE_UPROBE_MULTI)
			return -EINVAL;
		return 0;
	case BPF_PROG_TYPE_SCHED_CLS:
@@ -4953,8 +4959,10 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
	case BPF_PROG_TYPE_KPROBE:
		if (attr->link_create.attach_type == BPF_PERF_EVENT)
			ret = bpf_perf_link_attach(attr, prog);
		else
		else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI)
			ret = bpf_kprobe_multi_link_attach(attr, prog);
		else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI)
			ret = bpf_uprobe_multi_link_attach(attr, prog);
		break;
	default:
		ret = -EINVAL;
+233 −0
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include <linux/sort.h>
#include <linux/key.h>
#include <linux/verification.h>
#include <linux/namei.h>

#include <net/bpf_sk_storage.h>

@@ -2970,3 +2971,235 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
	return 0;
}
#endif

#ifdef CONFIG_UPROBES
struct bpf_uprobe_multi_link;

struct bpf_uprobe {
	struct bpf_uprobe_multi_link *link;
	loff_t offset;
	struct uprobe_consumer consumer;
};

struct bpf_uprobe_multi_link {
	struct path path;
	struct bpf_link link;
	u32 cnt;
	struct bpf_uprobe *uprobes;
};

struct bpf_uprobe_multi_run_ctx {
	struct bpf_run_ctx run_ctx;
	unsigned long entry_ip;
};

static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes,
				  u32 cnt)
{
	u32 i;

	for (i = 0; i < cnt; i++) {
		uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset,
				  &uprobes[i].consumer);
	}
}

static void bpf_uprobe_multi_link_release(struct bpf_link *link)
{
	struct bpf_uprobe_multi_link *umulti_link;

	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
	bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
}

static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
{
	struct bpf_uprobe_multi_link *umulti_link;

	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
	path_put(&umulti_link->path);
	kvfree(umulti_link->uprobes);
	kfree(umulti_link);
}

static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
	.release = bpf_uprobe_multi_link_release,
	.dealloc = bpf_uprobe_multi_link_dealloc,
};

static int uprobe_prog_run(struct bpf_uprobe *uprobe,
			   unsigned long entry_ip,
			   struct pt_regs *regs)
{
	struct bpf_uprobe_multi_link *link = uprobe->link;
	struct bpf_uprobe_multi_run_ctx run_ctx = {
		.entry_ip = entry_ip,
	};
	struct bpf_prog *prog = link->link.prog;
	bool sleepable = prog->aux->sleepable;
	struct bpf_run_ctx *old_run_ctx;
	int err = 0;

	if (sleepable)
		rcu_read_lock_trace();
	else
		rcu_read_lock();

	migrate_disable();

	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
	err = bpf_prog_run(link->link.prog, regs);
	bpf_reset_run_ctx(old_run_ctx);

	migrate_enable();

	if (sleepable)
		rcu_read_unlock_trace();
	else
		rcu_read_unlock();
	return err;
}

static int
uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
{
	struct bpf_uprobe *uprobe;

	uprobe = container_of(con, struct bpf_uprobe, consumer);
	return uprobe_prog_run(uprobe, instruction_pointer(regs), regs);
}

static int
uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs)
{
	struct bpf_uprobe *uprobe;

	uprobe = container_of(con, struct bpf_uprobe, consumer);
	return uprobe_prog_run(uprobe, func, regs);
}

int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
	struct bpf_uprobe_multi_link *link = NULL;
	unsigned long __user *uref_ctr_offsets;
	unsigned long *ref_ctr_offsets = NULL;
	struct bpf_link_primer link_primer;
	struct bpf_uprobe *uprobes = NULL;
	unsigned long __user *uoffsets;
	void __user *upath;
	u32 flags, cnt, i;
	struct path path;
	char *name;
	int err;

	/* no support for 32bit archs yet */
	if (sizeof(u64) != sizeof(void *))
		return -EOPNOTSUPP;

	if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI)
		return -EINVAL;

	flags = attr->link_create.uprobe_multi.flags;
	if (flags & ~BPF_F_UPROBE_MULTI_RETURN)
		return -EINVAL;

	/*
	 * path, offsets and cnt are mandatory,
	 * ref_ctr_offsets is optional
	 */
	upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
	uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
	cnt = attr->link_create.uprobe_multi.cnt;

	if (!upath || !uoffsets || !cnt)
		return -EINVAL;

	uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets);

	name = strndup_user(upath, PATH_MAX);
	if (IS_ERR(name)) {
		err = PTR_ERR(name);
		return err;
	}

	err = kern_path(name, LOOKUP_FOLLOW, &path);
	kfree(name);
	if (err)
		return err;

	if (!d_is_reg(path.dentry)) {
		err = -EBADF;
		goto error_path_put;
	}

	err = -ENOMEM;

	link = kzalloc(sizeof(*link), GFP_KERNEL);
	uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL);

	if (!uprobes || !link)
		goto error_free;

	if (uref_ctr_offsets) {
		ref_ctr_offsets = kvcalloc(cnt, sizeof(*ref_ctr_offsets), GFP_KERNEL);
		if (!ref_ctr_offsets)
			goto error_free;
	}

	for (i = 0; i < cnt; i++) {
		if (uref_ctr_offsets && __get_user(ref_ctr_offsets[i], uref_ctr_offsets + i)) {
			err = -EFAULT;
			goto error_free;
		}
		if (__get_user(uprobes[i].offset, uoffsets + i)) {
			err = -EFAULT;
			goto error_free;
		}

		uprobes[i].link = link;

		if (flags & BPF_F_UPROBE_MULTI_RETURN)
			uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
		else
			uprobes[i].consumer.handler = uprobe_multi_link_handler;
	}

	link->cnt = cnt;
	link->uprobes = uprobes;
	link->path = path;

	bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI,
		      &bpf_uprobe_multi_link_lops, prog);

	for (i = 0; i < cnt; i++) {
		err = uprobe_register_refctr(d_real_inode(link->path.dentry),
					     uprobes[i].offset,
					     ref_ctr_offsets ? ref_ctr_offsets[i] : 0,
					     &uprobes[i].consumer);
		if (err) {
			bpf_uprobe_unregister(&path, uprobes, i);
			goto error_free;
		}
	}

	err = bpf_link_prime(&link->link, &link_primer);
	if (err)
		goto error_free;

	kvfree(ref_ctr_offsets);
	return bpf_link_settle(&link_primer);

error_free:
	kvfree(ref_ctr_offsets);
	kvfree(uprobes);
	kfree(link);
error_path_put:
	path_put(&path);
	return err;
}
#else /* !CONFIG_UPROBES */
int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}
#endif /* CONFIG_UPROBES */
+16 −0
Original line number Diff line number Diff line
@@ -1039,6 +1039,7 @@ enum bpf_attach_type {
	BPF_NETFILTER,
	BPF_TCX_INGRESS,
	BPF_TCX_EGRESS,
	BPF_TRACE_UPROBE_MULTI,
	__MAX_BPF_ATTACH_TYPE
};

@@ -1057,6 +1058,7 @@ enum bpf_link_type {
	BPF_LINK_TYPE_STRUCT_OPS = 9,
	BPF_LINK_TYPE_NETFILTER = 10,
	BPF_LINK_TYPE_TCX = 11,
	BPF_LINK_TYPE_UPROBE_MULTI = 12,
	MAX_BPF_LINK_TYPE,
};

@@ -1190,6 +1192,13 @@ enum {
	BPF_F_KPROBE_MULTI_RETURN = (1U << 0)
};

/* link_create.uprobe_multi.flags used in LINK_CREATE command for
 * BPF_TRACE_UPROBE_MULTI attach type to create return probe.
 */
enum {
	BPF_F_UPROBE_MULTI_RETURN = (1U << 0)
};

/* link_create.netfilter.flags used in LINK_CREATE command for
 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
 */
@@ -1626,6 +1635,13 @@ union bpf_attr {
				};
				__u64		expected_revision;
			} tcx;
			struct {
				__aligned_u64	path;
				__aligned_u64	offsets;
				__aligned_u64	ref_ctr_offsets;
				__u32		cnt;
				__u32		flags;
			} uprobe_multi;
		};
	} link_create;