Commit ab850abb authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Add SEC("ksyscall") support'



Andrii Nakryiko says:

====================

Add SEC("ksyscall")/SEC("kretsyscall") sections and corresponding
bpf_program__attach_ksyscall() API that simplifies tracing kernel syscalls
through kprobe mechanism. Kprobing syscalls isn't trivial due to varying
syscall handler names in the kernel and various ways syscall argument are
passed, depending on kernel architecture and configuration. SEC("ksyscall")
allows user to not care about such details and just get access to syscall
input arguments, while libbpf takes care of necessary feature detection logic.

There are still more quirks that are not straightforward to hide completely
(see comments about mmap(), clone() and compat syscalls), so in such more
advanced scenarios user might need to fall back to plain SEC("kprobe")
approach, but for absolute majority of users SEC("ksyscall") is a big
improvement.

As part of this patch set libbpf adds two more virtual __kconfig externs, in
addition to existing LINUX_KERNEL_VERSION: LINUX_HAS_BPF_COOKIE and
LINUX_HAS_SYSCALL_WRAPPER, which let's libbpf-provided BPF-side code minimize
external dependencies and assumptions and let's user-space part of libbpf to
perform all the feature detection logic. This benefits USDT support code,
which now doesn't depend on BPF CO-RE for its functionality.

v1->v2:
  - normalize extern variable-related warn and debug message formats (Alan);
rfc->v1:
  - drop dependency on kallsyms and speed up SYSCALL_WRAPPER detection (Alexei);
  - drop dependency on /proc/config.gz in bpf_tracing.h (Yaniv);
  - add doc comment and ephasize mmap(), clone() and compat quirks that are
    not supported (Ilya);
  - use mechanism similar to LINUX_KERNEL_VERSION to also improve USDT code.
====================

Reviewed-by: default avatarStanislav Fomichev <sdf@google.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 9ff5efde d814ed62
Loading
Loading
Loading
Loading
+38 −13
Original line number Diff line number Diff line
@@ -2,6 +2,8 @@
#ifndef __BPF_TRACING_H__
#define __BPF_TRACING_H__

#include <bpf/bpf_helpers.h>

/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
#if defined(__TARGET_ARCH_x86)
	#define bpf_target_x86
@@ -140,7 +142,7 @@ struct pt_regs___s390 {
#define __PT_RC_REG gprs[2]
#define __PT_SP_REG gprs[15]
#define __PT_IP_REG psw.addr
#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2)

#elif defined(bpf_target_arm)
@@ -174,7 +176,7 @@ struct pt_regs___arm64 {
#define __PT_RC_REG regs[0]
#define __PT_SP_REG sp
#define __PT_IP_REG pc
#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; })
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0)

#elif defined(bpf_target_mips)
@@ -493,16 +495,26 @@ typeof(name(0)) name(struct pt_regs *ctx) \
}									    \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)

/* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */
#define ___bpf_syscall_args0()           ctx
#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
#define ___bpf_syscall_args1(x)          ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs)
#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs)
#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs)
#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs)
#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs)
#define ___bpf_syscall_args(args...)     ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)

/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */
#define ___bpf_syswrap_args0()           ctx
#define ___bpf_syswrap_args1(x)          ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs)
#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs)
#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
#define ___bpf_syswrap_args(args...)     ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args)

/*
 * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for
 * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for
 * tracing syscall functions, like __x64_sys_close. It hides the underlying
 * platform-specific low-level way of getting syscall input arguments from
 * struct pt_regs, and provides a familiar typed and named function arguments
@@ -511,21 +523,34 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 * Original struct pt_regs * context is preserved as 'ctx' argument. This might
 * be necessary when using BPF helpers like bpf_perf_event_output().
 *
 * This macro relies on BPF CO-RE support.
 * At the moment BPF_KSYSCALL does not handle all the calling convention
 * quirks for mmap(), clone() and compat syscalls transparrently. This may or
 * may not change in the future. User needs to take extra measures to handle
 * such quirks explicitly, if necessary.
 *
 * This macro relies on BPF CO-RE support and virtual __kconfig externs.
 */
#define BPF_KPROBE_SYSCALL(name, args...)				    \
#define BPF_KSYSCALL(name, args...)					    \
name(struct pt_regs *ctx);						    \
extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig;			    \
static __attribute__((always_inline)) typeof(name(0))			    \
____##name(struct pt_regs *ctx, ##args);				    \
typeof(name(0)) name(struct pt_regs *ctx)				    \
{									    \
	struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx);		    \
	struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER		    \
			       ? (struct pt_regs *)PT_REGS_PARM1(ctx)	    \
			       : ctx;					    \
	_Pragma("GCC diagnostic push")					    \
	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		    \
	if (LINUX_HAS_SYSCALL_WRAPPER)					    \
		return ____##name(___bpf_syswrap_args(args));		    \
	else								    \
		return ____##name(___bpf_syscall_args(args));		    \
	_Pragma("GCC diagnostic pop")					    \
}									    \
static __attribute__((always_inline)) typeof(name(0))			    \
____##name(struct pt_regs *ctx, ##args)

#define BPF_KPROBE_SYSCALL BPF_KSYSCALL

#endif
+174 −40
Original line number Diff line number Diff line
@@ -1694,7 +1694,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
	switch (ext->kcfg.type) {
	case KCFG_BOOL:
		if (value == 'm') {
			pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
			pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
				ext->name, value);
			return -EINVAL;
		}
@@ -1715,7 +1715,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
	case KCFG_INT:
	case KCFG_CHAR_ARR:
	default:
		pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
		pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
			ext->name, value);
		return -EINVAL;
	}
@@ -1729,7 +1729,8 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
	size_t len;

	if (ext->kcfg.type != KCFG_CHAR_ARR) {
		pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
		pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
			ext->name, value);
		return -EINVAL;
	}

@@ -1743,7 +1744,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
	/* strip quotes */
	len -= 2;
	if (len >= ext->kcfg.sz) {
		pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
		pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
			ext->name, value, len, ext->kcfg.sz - 1);
		len = ext->kcfg.sz - 1;
	}
@@ -1800,13 +1801,20 @@ static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
			      __u64 value)
{
	if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
		pr_warn("extern (kcfg) %s=%llu should be integer\n",
	if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
	    ext->kcfg.type != KCFG_BOOL) {
		pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
			ext->name, (unsigned long long)value);
		return -EINVAL;
	}
	if (ext->kcfg.type == KCFG_BOOL && value > 1) {
		pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
			ext->name, (unsigned long long)value);
		return -EINVAL;

	}
	if (!is_kcfg_value_in_range(ext, value)) {
		pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
		pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
			ext->name, (unsigned long long)value, ext->kcfg.sz);
		return -ERANGE;
	}
@@ -1870,16 +1878,19 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
		/* assume integer */
		err = parse_u64(value, &num);
		if (err) {
			pr_warn("extern (kcfg) %s=%s should be integer\n",
				ext->name, value);
			pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
			return err;
		}
		if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
			pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
			return -EINVAL;
		}
		err = set_kcfg_value_num(ext, ext_val, num);
		break;
	}
	if (err)
		return err;
	pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
	pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
	return 0;
}

@@ -3687,7 +3698,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
			ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
						        &ext->kcfg.is_signed);
			if (ext->kcfg.type == KCFG_UNKNOWN) {
				pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
				pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
				return -ENOTSUP;
			}
		} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
@@ -4659,6 +4670,8 @@ static int probe_kern_btf_enum64(void)
					     strs, sizeof(strs)));
}

static int probe_kern_syscall_wrapper(void);

enum kern_feature_result {
	FEAT_UNKNOWN = 0,
	FEAT_SUPPORTED = 1,
@@ -4727,6 +4740,9 @@ static struct kern_feature_desc {
	[FEAT_BTF_ENUM64] = {
		"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
	},
	[FEAT_SYSCALL_WRAPPER] = {
		"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
	},
};

bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -7287,14 +7303,14 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
		return 0;

	if (ext->is_set && ext->ksym.addr != sym_addr) {
		pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
		pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
			sym_name, ext->ksym.addr, sym_addr);
		return -EINVAL;
	}
	if (!ext->is_set) {
		ext->is_set = true;
		ext->ksym.addr = sym_addr;
		pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
		pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
	}
	return 0;
}
@@ -7498,28 +7514,52 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
	for (i = 0; i < obj->nr_extern; i++) {
		ext = &obj->externs[i];

		if (ext->type == EXT_KCFG &&
		    strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
			void *ext_val = kcfg_data + ext->kcfg.data_off;
			__u32 kver = get_kernel_version();
		if (ext->type == EXT_KSYM) {
			if (ext->ksym.type_id)
				need_vmlinux_btf = true;
			else
				need_kallsyms = true;
			continue;
		} else if (ext->type == EXT_KCFG) {
			void *ext_ptr = kcfg_data + ext->kcfg.data_off;
			__u64 value = 0;

			/* Kconfig externs need actual /proc/config.gz */
			if (str_has_pfx(ext->name, "CONFIG_")) {
				need_config = true;
				continue;
			}

			if (!kver) {
				pr_warn("failed to get kernel version\n");
			/* Virtual kcfg externs are customly handled by libbpf */
			if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
				value = get_kernel_version();
				if (!value) {
					pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
					return -EINVAL;
				}
			err = set_kcfg_value_num(ext, ext_val, kver);
			} else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
				value = kernel_supports(obj, FEAT_BPF_COOKIE);
			} else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
				value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
			} else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
				/* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
				 * __kconfig externs, where LINUX_ ones are virtual and filled out
				 * customly by libbpf (their values don't come from Kconfig).
				 * If LINUX_xxx variable is not recognized by libbpf, but is marked
				 * __weak, it defaults to zero value, just like for CONFIG_xxx
				 * externs.
				 */
				pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
				return -EINVAL;
			}

			err = set_kcfg_value_num(ext, ext_ptr, value);
			if (err)
				return err;
			pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
		} else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) {
			need_config = true;
		} else if (ext->type == EXT_KSYM) {
			if (ext->ksym.type_id)
				need_vmlinux_btf = true;
			else
				need_kallsyms = true;
			pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
				 ext->name, (long long)value);
		} else {
			pr_warn("unrecognized extern '%s'\n", ext->name);
			pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
			return -EINVAL;
		}
	}
@@ -7555,10 +7595,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
		ext = &obj->externs[i];

		if (!ext->is_set && !ext->is_weak) {
			pr_warn("extern %s (strong) not resolved\n", ext->name);
			pr_warn("extern '%s' (strong): not resolved\n", ext->name);
			return -ESRCH;
		} else if (!ext->is_set) {
			pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
			pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
				 ext->name);
		}
	}
@@ -8386,6 +8426,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log

static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
@@ -8406,6 +8447,8 @@ static const struct bpf_sec_def section_defs[] = {
	SEC_DEF("uretprobe.s+",		KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
	SEC_DEF("kprobe.multi+",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
	SEC_DEF("kretprobe.multi+",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
	SEC_DEF("ksyscall+",		KPROBE,	0, SEC_NONE, attach_ksyscall),
	SEC_DEF("kretsyscall+",		KPROBE, 0, SEC_NONE, attach_ksyscall),
	SEC_DEF("usdt+",		KPROBE,	0, SEC_NONE, attach_usdt),
	SEC_DEF("tc",			SCHED_CLS, 0, SEC_NONE),
	SEC_DEF("classifier",		SCHED_CLS, 0, SEC_NONE),
@@ -9762,7 +9805,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
{
	struct perf_event_attr attr = {};
	char errmsg[STRERR_BUFSIZE];
	int type, pfd, err;
	int type, pfd;

	if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
		return -EINVAL;
@@ -9798,14 +9841,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
		      pid < 0 ? -1 : pid /* pid */,
		      pid == -1 ? 0 : -1 /* cpu */,
		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
	if (pfd < 0) {
		err = -errno;
		pr_warn("%s perf_event_open() failed: %s\n",
			uprobe ? "uprobe" : "kprobe",
			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
		return err;
	}
	return pfd;
	return pfd >= 0 ? pfd : -errno;
}

static int append_to_file(const char *file, const char *fmt, ...)
@@ -9910,6 +9946,60 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
	return err;
}

static const char *arch_specific_syscall_pfx(void)
{
#if defined(__x86_64__)
	return "x64";
#elif defined(__i386__)
	return "ia32";
#elif defined(__s390x__)
	return "s390x";
#elif defined(__s390__)
	return "s390";
#elif defined(__arm__)
	return "arm";
#elif defined(__aarch64__)
	return "arm64";
#elif defined(__mips__)
	return "mips";
#elif defined(__riscv)
	return "riscv";
#else
	return NULL;
#endif
}

static int probe_kern_syscall_wrapper(void)
{
	char syscall_name[64];
	const char *ksys_pfx;

	ksys_pfx = arch_specific_syscall_pfx();
	if (!ksys_pfx)
		return 0;

	snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);

	if (determine_kprobe_perf_type() >= 0) {
		int pfd;

		pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
		if (pfd >= 0)
			close(pfd);

		return pfd >= 0 ? 1 : 0;
	} else { /* legacy mode */
		char probe_name[128];

		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
		if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
			return 0;

		(void)remove_kprobe_event_legacy(probe_name, false);
		return 1;
	}
}

struct bpf_link *
bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
				const char *func_name,
@@ -9995,6 +10085,29 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
	return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
}

struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
					      const char *syscall_name,
					      const struct bpf_ksyscall_opts *opts)
{
	LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
	char func_name[128];

	if (!OPTS_VALID(opts, bpf_ksyscall_opts))
		return libbpf_err_ptr(-EINVAL);

	if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
		snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
			 arch_specific_syscall_pfx(), syscall_name);
	} else {
		snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
	}

	kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
	kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);

	return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
}

/* Adapted from perf/util/string.c */
static bool glob_match(const char *str, const char *pat)
{
@@ -10165,6 +10278,27 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf
	return libbpf_get_error(*link);
}

static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
	LIBBPF_OPTS(bpf_ksyscall_opts, opts);
	const char *syscall_name;

	*link = NULL;

	/* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
	if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
		return 0;

	opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
	if (opts.retprobe)
		syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
	else
		syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;

	*link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
	return *link ? 0 : -errno;
}

static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+46 −0
Original line number Diff line number Diff line
@@ -457,6 +457,52 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
				      const char *pattern,
				      const struct bpf_kprobe_multi_opts *opts);

struct bpf_ksyscall_opts {
	/* size of this struct, for forward/backward compatiblity */
	size_t sz;
	/* custom user-provided value fetchable through bpf_get_attach_cookie() */
	__u64 bpf_cookie;
	/* attach as return probe? */
	bool retprobe;
	size_t :0;
};
#define bpf_ksyscall_opts__last_field retprobe

/**
 * @brief **bpf_program__attach_ksyscall()** attaches a BPF program
 * to kernel syscall handler of a specified syscall. Optionally it's possible
 * to request to install retprobe that will be triggered at syscall exit. It's
 * also possible to associate BPF cookie (though options).
 *
 * Libbpf automatically will determine correct full kernel function name,
 * which depending on system architecture and kernel version/configuration
 * could be of the form __<arch>_sys_<syscall> or __se_sys_<syscall>, and will
 * attach specified program using kprobe/kretprobe mechanism.
 *
 * **bpf_program__attach_ksyscall()** is an API counterpart of declarative
 * **SEC("ksyscall/<syscall>")** annotation of BPF programs.
 *
 * At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do
 * not handle all the calling convention quirks for mmap(), clone() and compat
 * syscalls. It also only attaches to "native" syscall interfaces. If host
 * system supports compat syscalls or defines 32-bit syscalls in 64-bit
 * kernel, such syscall interfaces won't be attached to by libbpf.
 *
 * These limitations may or may not change in the future. Therefore it is
 * recommended to use SEC("kprobe") for these syscalls or if working with
 * compat and 32-bit interfaces is required.
 *
 * @param prog BPF program to attach
 * @param syscall_name Symbolic name of the syscall (e.g., "bpf")
 * @param opts Additional options (see **struct bpf_ksyscall_opts**)
 * @return Reference to the newly created BPF link; or NULL is returned on
 * error, error code is stored in errno
 */
LIBBPF_API struct bpf_link *
bpf_program__attach_ksyscall(const struct bpf_program *prog,
			     const char *syscall_name,
			     const struct bpf_ksyscall_opts *opts);

struct bpf_uprobe_opts {
	/* size of this struct, for forward/backward compatiblity */
	size_t sz;
+1 −0
Original line number Diff line number Diff line
@@ -356,6 +356,7 @@ LIBBPF_0.8.0 {
LIBBPF_1.0.0 {
	global:
		bpf_prog_query_opts;
		bpf_program__attach_ksyscall;
		btf__add_enum64;
		btf__add_enum64_value;
		libbpf_bpf_attach_type_str;
+2 −0
Original line number Diff line number Diff line
@@ -352,6 +352,8 @@ enum kern_feature_id {
	FEAT_BPF_COOKIE,
	/* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */
	FEAT_BTF_ENUM64,
	/* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */
	FEAT_SYSCALL_WRAPPER,
	__FEAT_CNT,
};

Loading