Commit acfadf25 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'samples-bpf-make-bpf-programs-more-libbpf-aware'

Daniel T. Lee says:

====================
samples/bpf: make BPF programs more libbpf aware

The existing tracing programs have been developed for a considerable
period of time and, as a result, do not properly incorporate the
features of the current libbpf, such as CO-RE. This is evident in
frequent usage of functions like PT_REGS* and the persistence of "hack"
methods using underscore-style bpf_probe_read_kernel from the past.
These programs are far behind the current level of libbpf and can
potentially confuse users.

The kernel has undergone significant changes, and some of these changes
have broken these programs, but on the other hand, more robust APIs have
been developed for increased stableness.

To list some of the kernel changes that this patch set is focusing on,
- symbol mismatch occurs due to compiler optimization [1]
- inline of blk_account_io* breaks BPF kprobe program [2]
- new tracepoints for the block_io_start/done are introduced [3]
- map lookup probes can't be triggered (bpf_disable_instrumentation)[4]
- BPF_KSYSCALL has been introduced to simplify argument fetching [5]
- convert to vmlinux.h and use tp argument structure within it
- make tracing programs to be more CO-RE centric

In this regard, this patch set aims not only to integrate the latest
features of libbpf into BPF programs but also to reduce confusion and
clarify the BPF programs. This will help with the potential confusion
among users and make the programs more intutitive.

[1]: https://github.com/iovisor/bcc/issues/1754
[2]: https://github.com/iovisor/bcc/issues/4261
[3]: commit 5a80bd07 ("block: introduce block_io_start/block_io_done tracepoints")
[4]: commit 7c4cd051 ("bpf: Fix syscall's stackmap lookup potential deadlock")
[5]: commit 6f5d467d ("libbpf: improve BPF_KPROBE_SYSCALL macro and rename it to BPF_KSYSCALL")
====================

Link: https://lore.kernel.org/r/20230818090119.477441-1-danieltimlee@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 5bebd3e3 456d5355
Loading
Loading
Loading
Loading
+10 −10
Original line number Diff line number Diff line
@@ -124,21 +124,21 @@ always-y := $(tprogs-y)
always-y += sockex1_kern.o
always-y += sockex2_kern.o
always-y += sockex3_kern.o
always-y += tracex1_kern.o
always-y += tracex1.bpf.o
always-y += tracex2.bpf.o
always-y += tracex3_kern.o
always-y += tracex4_kern.o
always-y += tracex5_kern.o
always-y += tracex6_kern.o
always-y += tracex7_kern.o
always-y += tracex3.bpf.o
always-y += tracex4.bpf.o
always-y += tracex5.bpf.o
always-y += tracex6.bpf.o
always-y += tracex7.bpf.o
always-y += sock_flags.bpf.o
always-y += test_probe_write_user.bpf.o
always-y += trace_output.bpf.o
always-y += tcbpf1_kern.o
always-y += tc_l2_redirect_kern.o
always-y += lathist_kern.o
always-y += offwaketime_kern.o
always-y += spintest_kern.o
always-y += offwaketime.bpf.o
always-y += spintest.bpf.o
always-y += map_perf_test.bpf.o
always-y += test_overhead_tp.bpf.o
always-y += test_overhead_raw_tp.bpf.o
@@ -333,7 +333,7 @@ $(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h
$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h
$(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h

$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/tracex5.bpf.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
$(obj)/hbm.o: $(src)/hbm.h
$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
@@ -440,7 +440,7 @@ $(obj)/%.o: $(src)/%.c
		-Wno-gnu-variable-sized-type-not-at-end \
		-Wno-address-of-packed-member -Wno-tautological-compare \
		-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
		-fno-asynchronous-unwind-tables \
		-fno-asynchronous-unwind-tables -fcf-protection \
		-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
		-O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \
		$(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
+2 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@
#define TC_ACT_OK		0
#define TC_ACT_SHOT		2

#define IFNAMSIZ 16

#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
	__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define bpf_ntohs(x)		__builtin_bswap16(x)
+11 −28
Original line number Diff line number Diff line
@@ -4,20 +4,15 @@
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <uapi/linux/bpf.h>
#include <uapi/linux/ptrace.h>
#include <uapi/linux/perf_event.h>
#include "vmlinux.h"
#include <linux/version.h>
#include <linux/sched.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>

#define _(P)                                                                   \
	({                                                                     \
		typeof(P) val;                                                 \
		bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
		val;                                                           \
	})
#ifndef PERF_MAX_STACK_DEPTH
#define PERF_MAX_STACK_DEPTH         127
#endif

#define MINBLOCK_US	1
#define MAX_ENTRIES	10000
@@ -67,11 +62,9 @@ struct {
SEC("kprobe/try_to_wake_up")
int waker(struct pt_regs *ctx)
{
	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
	struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
	u32 pid = BPF_CORE_READ(p, pid);
	struct wokeby_t woke;
	u32 pid;

	pid = _(p->pid);

	bpf_get_current_comm(&woke.name, sizeof(woke.name));
	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
@@ -111,28 +104,18 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)

#if 1
/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
	unsigned long long pad;
	char prev_comm[TASK_COMM_LEN];
	int prev_pid;
	int prev_prio;
	long long prev_state;
	char next_comm[TASK_COMM_LEN];
	int next_pid;
	int next_prio;
};
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
int oncpu(struct trace_event_raw_sched_switch *ctx)
{
	/* record previous thread sleep time */
	u32 pid = ctx->prev_pid;
#else
SEC("kprobe/finish_task_switch")
SEC("kprobe.multi/finish_task_switch*")
int oncpu(struct pt_regs *ctx)
{
	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
	struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
	/* record previous thread sleep time */
	u32 pid = _(p->pid);
	u32 pid = BPF_CORE_READ(p, pid);
#endif
	u64 delta, ts, *tsp;

+1 −1
Original line number Diff line number Diff line
@@ -105,7 +105,7 @@ int main(int argc, char **argv)
		return 2;
	}

	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
	snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
	obj = bpf_object__open_file(filename, NULL);
	if (libbpf_get_error(obj)) {
		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+9 −18
Original line number Diff line number Diff line
@@ -4,14 +4,15 @@
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include "vmlinux.h"
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/perf_event.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

#ifndef PERF_MAX_STACK_DEPTH
#define PERF_MAX_STACK_DEPTH         127
#endif

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__type(key, long);
@@ -46,20 +47,10 @@ int foo(struct pt_regs *ctx) \
}

/* add kprobes to all possible *spin* functions */
SEC("kprobe/spin_unlock")PROG(p1)
SEC("kprobe/spin_lock")PROG(p2)
SEC("kprobe/mutex_spin_on_owner")PROG(p3)
SEC("kprobe/rwsem_spin_on_owner")PROG(p4)
SEC("kprobe/spin_unlock_irqrestore")PROG(p5)
SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6)
SEC("kprobe/_raw_spin_unlock_bh")PROG(p7)
SEC("kprobe/_raw_spin_unlock")PROG(p8)
SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9)
SEC("kprobe/_raw_spin_trylock_bh")PROG(p10)
SEC("kprobe/_raw_spin_lock_irq")PROG(p11)
SEC("kprobe/_raw_spin_trylock")PROG(p12)
SEC("kprobe/_raw_spin_lock")PROG(p13)
SEC("kprobe/_raw_spin_lock_bh")PROG(p14)
SEC("kprobe.multi/spin_*lock*")PROG(spin_lock)
SEC("kprobe.multi/*_spin_on_owner")PROG(spin_on_owner)
SEC("kprobe.multi/_raw_spin_*lock*")PROG(raw_spin_lock)

/* and to inner bpf helpers */
SEC("kprobe/htab_map_update_elem")PROG(p15)
SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)
Loading