Commit 6fda2405 authored by Namhyung Kim's avatar Namhyung Kim Committed by Arnaldo Carvalho de Melo
Browse files

perf lock: Implement cpu and task filters for BPF



Add -a/--all-cpus and -C/--cpu options for cpu filtering.  Also -p/--pid
and --tid options are added for task filtering.  The short -t option is
taken for --threads already.  Tracking the command line workload is
possible as well.

  $ sudo perf lock contention -a -b sleep 1

Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
Cc: Blake Jones <blakejones@google.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220729200756.666106-4-namhyung@kernel.org


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 407b36f6
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -128,6 +128,23 @@ CONTENTION OPTIONS
	Use BPF program to collect lock contention stats instead of
	using the input data.

-a::
--all-cpus::
        System-wide collection from all CPUs.

-C::
--cpu::
	Collect samples only on the list of CPUs provided. Multiple CPUs can be
	provided as a comma-separated list with no space: 0,1. Ranges of CPUs
	are specified with -: 0-2.  Default is to monitor all CPUs.

-p::
--pid=::
	Record events on existing process ID (comma separated list).

--tid=::
        Record events on existing thread ID (comma separated list).


SEE ALSO
--------
+46 −9
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include "util/symbol.h"
#include "util/thread.h"
#include "util/header.h"
#include "util/target.h"
#include "util/callchain.h"
#include "util/lock-contention.h"

@@ -38,6 +39,7 @@
#include <linux/stringify.h>

static struct perf_session *session;
static struct target target;

/* based on kernel/lockdep.c */
#define LOCKHASH_BITS		12
@@ -1578,7 +1580,7 @@ static void sighandler(int sig __maybe_unused)
{
}

static int __cmd_contention(void)
static int __cmd_contention(int argc, const char **argv)
{
	int err = -EINVAL;
	struct perf_tool eops = {
@@ -1592,6 +1594,7 @@ static int __cmd_contention(void)
		.mode  = PERF_DATA_MODE_READ,
		.force = force,
	};
	struct evlist *evlist = NULL;

	session = perf_session__new(use_bpf ? NULL : &data, &eops);
	if (IS_ERR(session)) {
@@ -1604,14 +1607,40 @@ static int __cmd_contention(void)
	symbol__init(&session->header.env);

	if (use_bpf) {
		if (lock_contention_prepare() < 0) {
			pr_err("lock contention BPF setup failed\n");
			return -1;
		err = target__validate(&target);
		if (err) {
			char errbuf[512];

			target__strerror(&target, err, errbuf, 512);
			pr_err("%s\n", errbuf);
			goto out_delete;
		}

		signal(SIGINT, sighandler);
		signal(SIGCHLD, sighandler);
		signal(SIGTERM, sighandler);

		evlist = evlist__new();
		if (evlist == NULL) {
			err = -ENOMEM;
			goto out_delete;
		}

		err = evlist__create_maps(evlist, &target);
		if (err < 0)
			goto out_delete;

		if (argc) {
			err = evlist__prepare_workload(evlist, &target,
						       argv, false, NULL);
			if (err < 0)
				goto out_delete;
		}

		if (lock_contention_prepare(evlist, &target) < 0) {
			pr_err("lock contention BPF setup failed\n");
			goto out_delete;
		}
	} else {
		if (!perf_session__has_traces(session, "lock record"))
			goto out_delete;
@@ -1642,6 +1671,8 @@ static int __cmd_contention(void)

	if (use_bpf) {
		lock_contention_start();
		if (argc)
			evlist__start_workload(evlist);

		/* wait for signal */
		pause();
@@ -1660,6 +1691,7 @@ static int __cmd_contention(void)
	print_contention_result();

out_delete:
	evlist__delete(evlist);
	lock_contention_finish();
	perf_session__delete(session);
	return err;
@@ -1792,6 +1824,15 @@ int cmd_lock(int argc, const char **argv)
	OPT_BOOLEAN('t', "threads", &show_thread_stats,
		    "show per-thread lock stats"),
	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
		    "System-wide collection from all CPUs"),
	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
		    "List of cpus to monitor"),
	OPT_STRING('p', "pid", &target.pid, "pid",
		   "Trace on existing process id"),
	/* TODO: Add short option -t after -t/--tracer can be removed. */
	OPT_STRING(0, "tid", &target.tid, "tid",
		   "Trace on existing thread id (exclusive to --pid)"),
	OPT_PARENT(lock_options)
	};

@@ -1861,12 +1902,8 @@ int cmd_lock(int argc, const char **argv)
		if (argc) {
			argc = parse_options(argc, argv, contention_options,
					     contention_usage, 0);
			if (argc) {
				usage_with_options(contention_usage,
						   contention_options);
			}
		}
		rc = __cmd_contention();
		rc = __cmd_contention(argc, argv);
	} else {
		usage_with_options(lock_usage, lock_options);
	}
+50 −1
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include "util/debug.h"
#include "util/evlist.h"
#include "util/machine.h"
#include "util/map.h"
#include "util/symbol.h"
#include "util/target.h"
#include "util/thread_map.h"
#include "util/lock-contention.h"
#include <linux/zalloc.h>
#include <bpf/bpf.h>
@@ -24,19 +27,65 @@ struct lock_contention_data {
	u32 flags;
};

int lock_contention_prepare(void)
int lock_contention_prepare(struct evlist *evlist, struct target *target)
{
	int i, fd;
	int ncpus = 1, ntasks = 1;

	skel = lock_contention_bpf__open();
	if (!skel) {
		pr_err("Failed to open lock-contention BPF skeleton\n");
		return -1;
	}

	if (target__has_cpu(target))
		ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
	if (target__has_task(target))
		ntasks = perf_thread_map__nr(evlist->core.threads);

	bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
	bpf_map__set_max_entries(skel->maps.task_filter, ntasks);

	if (lock_contention_bpf__load(skel) < 0) {
		pr_err("Failed to load lock-contention BPF skeleton\n");
		return -1;
	}

	if (target__has_cpu(target)) {
		u32 cpu;
		u8 val = 1;

		skel->bss->has_cpu = 1;
		fd = bpf_map__fd(skel->maps.cpu_filter);

		for (i = 0; i < ncpus; i++) {
			cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
			bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
		}
	}

	if (target__has_task(target)) {
		u32 pid;
		u8 val = 1;

		skel->bss->has_task = 1;
		fd = bpf_map__fd(skel->maps.task_filter);

		for (i = 0; i < ntasks; i++) {
			pid = perf_thread_map__pid(evlist->core.threads, i);
			bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
		}
	}

	if (target__none(target) && evlist->workload.pid > 0) {
		u32 pid = evlist->workload.pid;
		u8 val = 1;

		skel->bss->has_task = 1;
		fd = bpf_map__fd(skel->maps.task_filter);
		bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
	}

	lock_contention_bpf__attach(skel);
	return 0;
}
+40 −1
Original line number Diff line number Diff line
@@ -54,8 +54,47 @@ struct {
	__uint(max_entries, MAX_ENTRIES);
} lock_stat SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u8));
	__uint(max_entries, 1);
} cpu_filter SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u8));
	__uint(max_entries, 1);
} task_filter SEC(".maps");

/* control flags */
int enabled;
int has_cpu;
int has_task;

static inline int can_record(void)
{
	if (has_cpu) {
		__u32 cpu = bpf_get_smp_processor_id();
		__u8 *ok;

		ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
		if (!ok)
			return 0;
	}

	if (has_task) {
		__u8 *ok;
		__u32 pid = bpf_get_current_pid_tgid();

		ok = bpf_map_lookup_elem(&task_filter, &pid);
		if (!ok)
			return 0;
	}

	return 1;
}

SEC("tp_btf/contention_begin")
int contention_begin(u64 *ctx)
@@ -63,7 +102,7 @@ int contention_begin(u64 *ctx)
	struct task_struct *curr;
	struct tstamp_data *pelem;

	if (!enabled)
	if (!enabled || !can_record())
		return 0;

	curr = bpf_get_current_task_btf();
+9 −2
Original line number Diff line number Diff line
@@ -103,11 +103,13 @@ struct thread_stat {
#define LCB_F_PERCPU	(1U << 4)
#define LCB_F_MUTEX	(1U << 5)

struct evlist;
struct machine;
struct target;

#ifdef HAVE_BPF_SKEL

int lock_contention_prepare(void);
int lock_contention_prepare(struct evlist *evlist, struct target *target);
int lock_contention_start(void);
int lock_contention_stop(void);
int lock_contention_read(struct machine *machine, struct hlist_head *head);
@@ -115,7 +117,12 @@ int lock_contention_finish(void);

#else  /* !HAVE_BPF_SKEL */

static inline int lock_contention_prepare(void) { return 0; }
static inline int lock_contention_prepare(struct evlist *evlist __maybe_unused,
					  struct target *target __maybe_unused)
{
	return 0;
}

static inline int lock_contention_start(void) { return 0; }
static inline int lock_contention_stop(void) { return 0; }
static inline int lock_contention_finish(void) { return 0; }