Commit 2a4b5166 authored by Ian Rogers's avatar Ian Rogers Committed by Arnaldo Carvalho de Melo
Browse files

perf bench: Add event synthesis benchmark



Event synthesis may occur at the start or end (tail) of a perf command.
In system-wide mode it can scan every process in /proc, which may add
seconds of latency before event recording. Add a new benchmark that
times how long event synthesis takes with and without data synthesis.

An example execution looks like:

 $ perf bench internals synthesize
 # Running 'internals/synthesize' benchmark:
 Average synthesis took: 168.253800 usec
 Average data synthesis took: 208.104700 usec

Signed-off-by: default avatarIan Rogers <irogers@google.com>
Acked-by: default avatarJiri Olsa <jolsa@redhat.com>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrey Zhizhikin <andrey.z@gmail.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20200402154357.107873-2-irogers@google.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 1a2725f3
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -61,6 +61,9 @@ SUBSYSTEM
'epoll'::
	Eventpoll (epoll) stressing benchmarks.

'internals'::
	Benchmark internal perf functionality.

'all'::
	All benchmark subsystems.

@@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls.
*ctl*::
Suite for evaluating multiple epoll_ctl calls.

SUITES FOR 'internals'
~~~~~~~~~~~~~~~~~~~~~~
*synthesize*::
Suite for evaluating perf's event synthesis performance.

SEE ALSO
--------
linkperf:perf[1]
+1 −1
Original line number Diff line number Diff line
@@ -6,9 +6,9 @@ perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o

perf-y += epoll-wait.o
perf-y += epoll-ctl.o
perf-y += synthesize.o

perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
+1 −1
Original line number Diff line number Diff line
@@ -41,9 +41,9 @@ int bench_futex_wake_parallel(int argc, const char **argv);
int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv);

int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
int bench_synthesize(int argc, const char **argv);

#define BENCH_FORMAT_DEFAULT_STR	"default"
#define BENCH_FORMAT_DEFAULT		0
+101 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * Benchmark synthesis of perf events such as at the start of a 'perf
 * record'. Synthesis is done on the current process and the 'dummy' event
 * handlers are invoked that support dump_trace but otherwise do nothing.
 *
 * Copyright 2019 Google LLC.
 */
#include <stdio.h>
#include "bench.h"
#include "../util/debug.h"
#include "../util/session.h"
#include "../util/synthetic-events.h"
#include "../util/target.h"
#include "../util/thread_map.h"
#include "../util/tool.h"
#include <linux/err.h>
#include <linux/time64.h>
#include <subcmd/parse-options.h>

static unsigned int iterations = 10000;

static const struct option options[] = {
	OPT_UINTEGER('i', "iterations", &iterations,
		"Number of iterations used to compute average"),
	OPT_END()
};

static const char *const usage[] = {
	"perf bench internals synthesize <options>",
	NULL
};


static int do_synthesize(struct perf_session *session,
			struct perf_thread_map *threads,
			struct target *target, bool data_mmap)
{
	const unsigned int nr_threads_synthesize = 1;
	struct timeval start, end, diff;
	u64 runtime_us;
	unsigned int i;
	double average;
	int err;

	gettimeofday(&start, NULL);
	for (i = 0; i < iterations; i++) {
		err = machine__synthesize_threads(&session->machines.host,
						target, threads, data_mmap,
						nr_threads_synthesize);
		if (err)
			return err;
	}

	gettimeofday(&end, NULL);
	timersub(&end, &start, &diff);
	runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
	average = (double)runtime_us/(double)iterations;
	printf("Average %ssynthesis took: %f usec\n",
		data_mmap ? "data " : "", average);
	return 0;
}

int bench_synthesize(int argc, const char **argv)
{
	struct perf_tool tool;
	struct perf_session *session;
	struct target target = {
		.pid = "self",
	};
	struct perf_thread_map *threads;
	int err;

	argc = parse_options(argc, argv, options, usage, 0);

	session = perf_session__new(NULL, false, NULL);
	if (IS_ERR(session)) {
		pr_err("Session creation failed.\n");
		return PTR_ERR(session);
	}
	threads = thread_map__new_by_pid(getpid());
	if (!threads) {
		pr_err("Thread map creation failed.\n");
		err = -ENOMEM;
		goto err_out;
	}
	perf_tool__fill_defaults(&tool);

	err = do_synthesize(session, threads, &target, false);
	if (err)
		goto err_out;

	err = do_synthesize(session, threads, &target, true);

err_out:
	if (threads)
		perf_thread_map__put(threads);

	perf_session__delete(session);
	return err;
}
+6 −0
Original line number Diff line number Diff line
@@ -76,6 +76,11 @@ static struct bench epoll_benchmarks[] = {
};
#endif // HAVE_EVENTFD

static struct bench internals_benchmarks[] = {
	{ "synthesize", "Benchmark perf event synthesis",	bench_synthesize	},
	{ NULL,		NULL,					NULL			}
};

struct collection {
	const char	*name;
	const char	*summary;
@@ -92,6 +97,7 @@ static struct collection collections[] = {
#ifdef HAVE_EVENTFD
	{"epoll",       "Epoll stressing benchmarks",                   epoll_benchmarks        },
#endif
	{ "internals",	"Perf-internals benchmarks",			internals_benchmarks	},
	{ "all",	"All benchmarks",				NULL			},
	{ NULL,		NULL,						NULL			}
};