Commit b30d7a77 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-tools-for-v6.5-1-2023-06-28' of...

Merge tag 'perf-tools-for-v6.5-1-2023-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next

Pull perf tools updates from Namhyung Kim:
 "Internal cleanup:

   - Refactor PMU data management to handle hybrid systems in a generic
     way.

     Do more work in the lexer so that legacy event types parse more
     easily. A side-effect of this is that if a PMU is specified,
     scanning sysfs is avoided improving start-up time.

   - Fix hybrid metrics, for example, the TopdownL1 works for both
     performance and efficiency cores on Intel machines. To support
     this, sort and regroup events after parsing.

   - Add reference count checking for the 'thread' data structure.

   - Lots of fixes for memory leaks in various places thanks to the ASAN
     and Ian's refcount checker.

   - Reduce the binary size by replacing static variables with local or
     dynamically allocated memory.

   - Introduce shared_mutex for annotate data to reduce memory
     footprint.

   - Make filesystem access library functions more thread safe.

  Test:

   - Organize cpu_map tests into a single suite.

   - Add metric value validation test to check if the values are within
     correct value ranges.

   - Add perf stat stdio output test to check if event and metric names
     match.

   - Add perf data converter JSON output test.

   - Fix a lot of issues reported by shellcheck(1). This is a
     preparation to enable shellcheck by default.

   - Make the large x86 new instructions test optional at build time
     using EXTRA_TESTS=1.

   - Add a test for libpfm4 events.

  perf script:

   - Add 'dsoff' outpuf field to display offset from the DSO.

      $ perf script -F comm,pid,event,ip,dsoff
         ls 2695501 cycles:      152cc73ef4b5 (/usr/lib/x86_64-linux-gnu/ld-2.31.so+0x1c4b5)
         ls 2695501 cycles:  ffffffff99045b3e ([kernel.kallsyms])
         ls 2695501 cycles:  ffffffff9968e107 ([kernel.kallsyms])
         ls 2695501 cycles:  ffffffffc1f54afb ([kernel.kallsyms])
         ls 2695501 cycles:  ffffffff9968382f ([kernel.kallsyms])
         ls 2695501 cycles:  ffffffff99e00094 ([kernel.kallsyms])
         ls 2695501 cycles:      152cc718a8d0 (/usr/lib/x86_64-linux-gnu/libselinux.so.1+0x68d0)
         ls 2695501 cycles:  ffffffff992a6db0 ([kernel.kallsyms])

   - Adjust width for large PID/TID values.

  perf report:

   - Robustify reading addr2line output for srcline by checking sentinel
     output before the actual data and by using timeout of 1 second.

   - Allow config terms (like 'name=ABC') with breakpoint events.

      $ perf record -e mem:0x55feb98dd169:x/name=breakpoint/ -p 19646 -- sleep 1

  perf annotate:

   - Handle x86 instruction suffix like 'l' in 'movl' generally.

   - Parse instruction operands properly even with a whitespace. This is
     needed for llvm-objdump output.

   - Support RISC-V binutils lookup using the triplet prefixes.

   - Add '<' and '>' key to navigate to prev/next symbols in TUI.

   - Fix instruction association and parsing for LoongArch.

  perf stat:

   - Add --per-cache aggregation option, optionally specify a cache
     level like `--per-cache=L2`.

      $ sudo perf stat --per-cache -a -e ls_dmnd_fills_from_sys.ext_cache_remote --\
        taskset -c 0-15,64-79,128-143,192-207\
        perf bench sched messaging -p -t -l 100000 -g 8

        # Running 'sched/messaging' benchmark:
        # 20 sender and receiver threads per group
        # 8 groups == 320 threads run

        Total time: 7.648 [sec]

        Performance counter stats for 'system wide':

        S0-D0-L3-ID0             16         17,145,912      ls_dmnd_fills_from_sys.ext_cache_remote
        S0-D0-L3-ID8             16         14,977,628      ls_dmnd_fills_from_sys.ext_cache_remote
        S0-D0-L3-ID16            16            262,539      ls_dmnd_fills_from_sys.ext_cache_remote
        S0-D0-L3-ID24            16              3,140      ls_dmnd_fills_from_sys.ext_cache_remote
        S0-D0-L3-ID32            16             27,403      ls_dmnd_fills_from_sys.ext_cache_remote
        S0-D0-L3-ID40            16             17,026      ls_dmnd_fills_from_sys.ext_cache_remote
        S0-D0-L3-ID48            16              7,292      ls_dmnd_fills_from_sys.ext_cache_remote
        S0-D0-L3-ID56            16              2,464      ls_dmnd_fills_from_sys.ext_cache_remote
        S1-D1-L3-ID64            16         22,489,306      ls_dmnd_fills_from_sys.ext_cache_remote
        S1-D1-L3-ID72            16         21,455,257      ls_dmnd_fills_from_sys.ext_cache_remote
        S1-D1-L3-ID80            16             11,619      ls_dmnd_fills_from_sys.ext_cache_remote
        S1-D1-L3-ID88            16             30,978      ls_dmnd_fills_from_sys.ext_cache_remote
        S1-D1-L3-ID96            16             37,628      ls_dmnd_fills_from_sys.ext_cache_remote
        S1-D1-L3-ID104           16             13,594      ls_dmnd_fills_from_sys.ext_cache_remote
        S1-D1-L3-ID112           16             10,164      ls_dmnd_fills_from_sys.ext_cache_remote
        S1-D1-L3-ID120           16             11,259      ls_dmnd_fills_from_sys.ext_cache_remote

              7.779171484 seconds time elapsed

   - Change default (no event/metric) formatting for default metrics so
     that events are hidden and the metric and group appear.

       Performance counter stats for 'ls /':

                    1.85 msec task-clock                       #    0.594 CPUs utilized
                       0      context-switches                 #    0.000 /sec
                       0      cpu-migrations                   #    0.000 /sec
                      97      page-faults                      #   52.517 K/sec
               2,187,173      cycles                           #    1.184 GHz
               2,474,459      instructions                     #    1.13  insn per cycle
                 531,584      branches                         #  287.805 M/sec
                  13,626      branch-misses                    #    2.56% of all branches
                              TopdownL1                 #     23.5 %  tma_backend_bound
                                                        #     11.5 %  tma_bad_speculation
                                                        #     39.1 %  tma_frontend_bound
                                                        #     25.9 %  tma_retiring

   - Allow --cputype option to have any PMU name (not just hybrid).

   - Fix output value not to added when it runs multiple times with -r
     option.

  perf list:

   - Show metricgroup description from JSON file called
     metricgroups.json.

   - Allow 'pfm' argument to list only libpfm4 events and check each
     event is supported before showing it.

  JSON vendor events:

   - Avoid event grouping using "NO_GROUP_EVENTS" constraints. The
     topdown events are correctly grouped even if no group exists.

   - Add "Default" metric group to print it in the default output. And
     use "DefaultMetricgroupName" to indicate the real metric group
     name.

   - Add AmpereOne core PMU events.

  Misc:

   - Define man page date correctly.

   - Track exception level properly on ARM CoreSight ETM.

   - Allow anonymous struct, union or enum when retrieving type names
     from DWARF.

   - Fix incorrect filename when calling `perf inject --jit`.

   - Handle PLT size correctly on LoongArch"

* tag 'perf-tools-for-v6.5-1-2023-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next: (269 commits)
  perf test: Skip metrics w/o event name in stat STD output linter
  perf test: Reorder event name checks in stat STD output linter
  perf pmu: Remove a hard coded cpu PMU assumption
  perf pmus: Add notion of default PMU for JSON events
  perf unwind: Fix map reference counts
  perf test: Set PERF_EXEC_PATH for script execution
  perf script: Initialize buffer for regs_map()
  perf tests: Fix test_arm_callgraph_fp variable expansion
  perf symbol: Add LoongArch case in get_plt_sizes()
  perf test: Remove x permission from lib/stat_output.sh
  perf test: Rerun failed metrics with longer workload
  perf test: Add skip list for metrics known would fail
  perf test: Add metric value validation test
  perf jit: Fix incorrect file name in DWARF line table
  perf annotate: Fix instruction association and parsing for LoongArch
  perf annotation: Switch lock from a mutex to a sharded_mutex
  perf sharded_mutex: Introduce sharded_mutex
  tools: Fix incorrect calculation of object size by sizeof
  perf subcmd: Fix missing check for return value of malloc() in add_cmdname()
  perf parse-events: Remove unneeded semicolon
  ...
parents d2a6fd45 4d60e83d
Loading
Loading
Loading
Loading
+11 −6
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ struct cgroupfs_cache_entry {
};

/* just cache last used one */
static struct cgroupfs_cache_entry cached;
static struct cgroupfs_cache_entry *cached;

int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
{
@@ -24,9 +24,9 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
	char *p, *path;
	char mountpoint[PATH_MAX];

	if (!strcmp(cached.subsys, subsys)) {
		if (strlen(cached.mountpoint) < maxlen) {
			strcpy(buf, cached.mountpoint);
	if (cached && !strcmp(cached->subsys, subsys)) {
		if (strlen(cached->mountpoint) < maxlen) {
			strcpy(buf, cached->mountpoint);
			return 0;
		}
		return -1;
@@ -91,8 +91,13 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
	free(line);
	fclose(fp);

	strncpy(cached.subsys, subsys, sizeof(cached.subsys) - 1);
	strcpy(cached.mountpoint, mountpoint);
	if (!cached)
		cached = calloc(1, sizeof(*cached));

	if (cached) {
		strncpy(cached->subsys, subsys, sizeof(cached->subsys) - 1);
		strcpy(cached->mountpoint, mountpoint);
	}

	if (mountpoint[0] && strlen(mountpoint) < maxlen) {
		strcpy(buf, mountpoint);
+99 −127
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
@@ -10,6 +11,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/mount.h>

@@ -43,7 +45,7 @@
#define BPF_FS_MAGIC           0xcafe4a11
#endif

static const char * const sysfs__fs_known_mountpoints[] = {
static const char * const sysfs__known_mountpoints[] = {
	"/sys",
	0,
};
@@ -86,87 +88,89 @@ static const char * const bpf_fs__known_mountpoints[] = {
};

struct fs {
	const char		*name;
	const char * const	*mounts;
	char			 path[PATH_MAX];
	bool			 found;
	bool			 checked;
	long			 magic;
};

enum {
	FS__SYSFS   = 0,
	FS__PROCFS  = 1,
	FS__DEBUGFS = 2,
	FS__TRACEFS = 3,
	FS__HUGETLBFS = 4,
	FS__BPF_FS = 5,
	const char *		 const name;
	const char * const *	 const mounts;
	char			*path;
	pthread_mutex_t		 mount_mutex;
	const long		 magic;
};

#ifndef TRACEFS_MAGIC
#define TRACEFS_MAGIC 0x74726163
#endif

static struct fs fs__entries[] = {
	[FS__SYSFS] = {
		.name	= "sysfs",
		.mounts	= sysfs__fs_known_mountpoints,
		.magic	= SYSFS_MAGIC,
		.checked = false,
	},
	[FS__PROCFS] = {
		.name	= "proc",
		.mounts	= procfs__known_mountpoints,
		.magic	= PROC_SUPER_MAGIC,
		.checked = false,
	},
	[FS__DEBUGFS] = {
		.name	= "debugfs",
		.mounts	= debugfs__known_mountpoints,
		.magic	= DEBUGFS_MAGIC,
		.checked = false,
	},
	[FS__TRACEFS] = {
		.name	= "tracefs",
		.mounts	= tracefs__known_mountpoints,
		.magic	= TRACEFS_MAGIC,
		.checked = false,
	},
	[FS__HUGETLBFS] = {
		.name	= "hugetlbfs",
		.mounts = hugetlbfs__known_mountpoints,
		.magic	= HUGETLBFS_MAGIC,
		.checked = false,
	},
	[FS__BPF_FS] = {
		.name	= "bpf",
		.mounts = bpf_fs__known_mountpoints,
		.magic	= BPF_FS_MAGIC,
		.checked = false,
	},
};
static void fs__init_once(struct fs *fs);
static const char *fs__mountpoint(const struct fs *fs);
static const char *fs__mount(struct fs *fs);

#define FS(lower_name, fs_name, upper_name)		\
static struct fs fs__##lower_name = {			\
	.name = #fs_name,				\
	.mounts = lower_name##__known_mountpoints,	\
	.magic = upper_name##_MAGIC,			\
	.mount_mutex = PTHREAD_MUTEX_INITIALIZER,	\
};							\
							\
static void lower_name##_init_once(void)		\
{							\
	struct fs *fs = &fs__##lower_name;		\
							\
	fs__init_once(fs);				\
}							\
							\
const char *lower_name##__mountpoint(void)		\
{							\
	static pthread_once_t init_once = PTHREAD_ONCE_INIT;	\
	struct fs *fs = &fs__##lower_name;		\
							\
	pthread_once(&init_once, lower_name##_init_once);	\
	return fs__mountpoint(fs);			\
}							\
							\
const char *lower_name##__mount(void)			\
{							\
	const char *mountpoint = lower_name##__mountpoint();	\
	struct fs *fs = &fs__##lower_name;		\
							\
	if (mountpoint)					\
		return mountpoint;			\
							\
	return fs__mount(fs);				\
}							\
							\
bool lower_name##__configured(void)			\
{							\
	return lower_name##__mountpoint() != NULL;	\
}

FS(sysfs, sysfs, SYSFS);
FS(procfs, procfs, PROC_SUPER);
FS(debugfs, debugfs, DEBUGFS);
FS(tracefs, tracefs, TRACEFS);
FS(hugetlbfs, hugetlbfs, HUGETLBFS);
FS(bpf_fs, bpf, BPF_FS);

static bool fs__read_mounts(struct fs *fs)
{
	bool found = false;
	char type[100];
	FILE *fp;
	char path[PATH_MAX + 1];

	fp = fopen("/proc/mounts", "r");
	if (fp == NULL)
		return NULL;
		return false;

	while (!found &&
	       fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
		      fs->path, type) == 2) {
	while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
		      path, type) == 2) {

		if (strcmp(type, fs->name) == 0)
			found = true;
		if (strcmp(type, fs->name) == 0) {
			fs->path = strdup(path);
			fclose(fp);
			return fs->path != NULL;
		}
	}

	fclose(fp);
	fs->checked = true;
	return fs->found = found;
	return false;
}

static int fs__valid_mount(const char *fs, long magic)
@@ -188,8 +192,9 @@ static bool fs__check_mounts(struct fs *fs)
	ptr = fs->mounts;
	while (*ptr) {
		if (fs__valid_mount(*ptr, fs->magic) == 0) {
			fs->found = true;
			strcpy(fs->path, *ptr);
			fs->path = strdup(*ptr);
			if (!fs->path)
				return false;
			return true;
		}
		ptr++;
@@ -227,43 +232,26 @@ static bool fs__env_override(struct fs *fs)
	if (!override_path)
		return false;

	fs->found = true;
	fs->checked = true;
	strncpy(fs->path, override_path, sizeof(fs->path) - 1);
	fs->path[sizeof(fs->path) - 1] = '\0';
	fs->path = strdup(override_path);
	if (!fs->path)
		return false;
	return true;
}

static const char *fs__get_mountpoint(struct fs *fs)
static void fs__init_once(struct fs *fs)
{
	if (fs__env_override(fs))
		return fs->path;

	if (fs__check_mounts(fs))
		return fs->path;

	if (fs__read_mounts(fs))
		return fs->path;

	return NULL;
	if (!fs__env_override(fs) &&
	    !fs__check_mounts(fs) &&
	    !fs__read_mounts(fs)) {
		assert(!fs->path);
	} else {
		assert(fs->path);
	}
}

static const char *fs__mountpoint(int idx)
static const char *fs__mountpoint(const struct fs *fs)
{
	struct fs *fs = &fs__entries[idx];

	if (fs->found)
		return (const char *)fs->path;

	/* the mount point was already checked for the mount point
	 * but and did not exist, so return NULL to avoid scanning again.
	 * This makes the found and not found paths cost equivalent
	 * in case of multiple calls.
	 */
	if (fs->checked)
		return NULL;

	return fs__get_mountpoint(fs);
	return fs->path;
}

static const char *mount_overload(struct fs *fs)
@@ -278,45 +266,29 @@ static const char *mount_overload(struct fs *fs)
	return getenv(upper_name) ?: *fs->mounts;
}

static const char *fs__mount(int idx)
static const char *fs__mount(struct fs *fs)
{
	struct fs *fs = &fs__entries[idx];
	const char *mountpoint;

	if (fs__mountpoint(idx))
		return (const char *)fs->path;
	pthread_mutex_lock(&fs->mount_mutex);

	mountpoint = mount_overload(fs);
	/* Check if path found inside the mutex to avoid races with other callers of mount. */
	mountpoint = fs__mountpoint(fs);
	if (mountpoint)
		goto out;

	if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0)
		return NULL;
	mountpoint = mount_overload(fs);

	return fs__check_mounts(fs) ? fs->path : NULL;
	if (mount(NULL, mountpoint, fs->name, 0, NULL) == 0 &&
	    fs__valid_mount(mountpoint, fs->magic) == 0) {
		fs->path = strdup(mountpoint);
		mountpoint = fs->path;
	}

#define FS(name, idx)				\
const char *name##__mountpoint(void)		\
{						\
	return fs__mountpoint(idx);		\
}						\
						\
const char *name##__mount(void)			\
{						\
	return fs__mount(idx);			\
}						\
						\
bool name##__configured(void)			\
{						\
	return name##__mountpoint() != NULL;	\
out:
	pthread_mutex_unlock(&fs->mount_mutex);
	return mountpoint;
}

FS(sysfs,   FS__SYSFS);
FS(procfs,  FS__PROCFS);
FS(debugfs, FS__DEBUGFS);
FS(tracefs, FS__TRACEFS);
FS(hugetlbfs, FS__HUGETLBFS);
FS(bpf_fs, FS__BPF_FS);

int filename__read_int(const char *filename, int *value)
{
	char line[64];
+6 −11
Original line number Diff line number Diff line
@@ -13,17 +13,12 @@

#include "tracing_path.h"

static char tracing_mnt[PATH_MAX]  = "/sys/kernel/debug";
static char tracing_path[PATH_MAX]        = "/sys/kernel/tracing";
static char tracing_events_path[PATH_MAX] = "/sys/kernel/tracing/events";

static void __tracing_path_set(const char *tracing, const char *mountpoint)
{
	snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
	snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
		 mountpoint, tracing);
	snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
		 mountpoint, tracing, "events");
}

static const char *tracing_path_tracefs_mount(void)
@@ -149,15 +144,15 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
			/* sdt markers */
			if (!strncmp(filename, "sdt_", 4)) {
				snprintf(buf, size,
					"Error:\tFile %s/%s not found.\n"
					"Error:\tFile %s/events/%s not found.\n"
					"Hint:\tSDT event cannot be directly recorded on.\n"
					"\tPlease first use 'perf probe %s:%s' before recording it.\n",
					tracing_events_path, filename, sys, name);
					tracing_path, filename, sys, name);
			} else {
				snprintf(buf, size,
					 "Error:\tFile %s/%s not found.\n"
					 "Error:\tFile %s/events/%s not found.\n"
					 "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n",
					 tracing_events_path, filename);
					 tracing_path, filename);
			}
			break;
		}
@@ -169,9 +164,9 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
		break;
	case EACCES: {
		snprintf(buf, size,
			 "Error:\tNo permissions to read %s/%s\n"
			 "Error:\tNo permissions to read %s/events/%s\n"
			 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
			 tracing_events_path, filename, tracing_path_mount());
			 tracing_path, filename, tracing_path_mount());
	}
		break;
	default:
+27 −1
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#define __API_IO__

#include <errno.h>
#include <poll.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -23,6 +24,8 @@ struct io {
	char *end;
	/* Currently accessed data pointer. */
	char *data;
	/* Read timeout, 0 implies no timeout. */
	int timeout_ms;
	/* Set true on when the end of file on read error. */
	bool eof;
};
@@ -35,6 +38,7 @@ static inline void io__init(struct io *io, int fd,
	io->buf = buf;
	io->end = buf;
	io->data = buf;
	io->timeout_ms = 0;
	io->eof = false;
}

@@ -47,7 +51,29 @@ static inline int io__get_char(struct io *io)
		return -1;

	if (ptr == io->end) {
		ssize_t n = read(io->fd, io->buf, io->buf_len);
		ssize_t n;

		if (io->timeout_ms != 0) {
			struct pollfd pfds[] = {
				{
					.fd = io->fd,
					.events = POLLIN,
				},
			};

			n = poll(pfds, 1, io->timeout_ms);
			if (n == 0)
				errno = ETIMEDOUT;
			if (n > 0 && !(pfds[0].revents & POLLIN)) {
				errno = EIO;
				n = -1;
			}
			if (n <= 0) {
				io->eof = true;
				return -1;
			}
		}
		n = read(io->fd, io->buf, io->buf_len);

		if (n <= 0) {
			io->eof = true;
+101 −24
Original line number Diff line number Diff line
@@ -99,6 +99,11 @@ static int cmp_cpu(const void *a, const void *b)
	return cpu_a->cpu - cpu_b->cpu;
}

static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
	return RC_CHK_ACCESS(cpus)->map[idx];
}

static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
{
	size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
@@ -111,8 +116,12 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
		/* Remove dups */
		j = 0;
		for (i = 0; i < nr_cpus; i++) {
			if (i == 0 || RC_CHK_ACCESS(cpus)->map[i].cpu != RC_CHK_ACCESS(cpus)->map[i - 1].cpu)
				RC_CHK_ACCESS(cpus)->map[j++].cpu = RC_CHK_ACCESS(cpus)->map[i].cpu;
			if (i == 0 ||
			    __perf_cpu_map__cpu(cpus, i).cpu !=
			    __perf_cpu_map__cpu(cpus, i - 1).cpu) {
				RC_CHK_ACCESS(cpus)->map[j++].cpu =
					__perf_cpu_map__cpu(cpus, i).cpu;
			}
		}
		perf_cpu_map__set_nr(cpus, j);
		assert(j <= nr_cpus);
@@ -269,26 +278,31 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
	return cpus;
}

static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)
{
	return RC_CHK_ACCESS(cpus)->nr;
}

struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
	struct perf_cpu result = {
		.cpu = -1
	};

	if (cpus && idx < RC_CHK_ACCESS(cpus)->nr)
		return RC_CHK_ACCESS(cpus)->map[idx];
	if (cpus && idx < __perf_cpu_map__nr(cpus))
		return __perf_cpu_map__cpu(cpus, idx);

	return result;
}

int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
{
	return cpus ? RC_CHK_ACCESS(cpus)->nr : 1;
	return cpus ? __perf_cpu_map__nr(cpus) : 1;
}

bool perf_cpu_map__empty(const struct perf_cpu_map *map)
{
	return map ? RC_CHK_ACCESS(map)->map[0].cpu == -1 : true;
	return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
}

int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
@@ -299,10 +313,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
		return -1;

	low = 0;
	high = RC_CHK_ACCESS(cpus)->nr;
	high = __perf_cpu_map__nr(cpus);
	while (low < high) {
		int idx = (low + high) / 2;
		struct perf_cpu cpu_at_idx = RC_CHK_ACCESS(cpus)->map[idx];
		struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx);

		if (cpu_at_idx.cpu == cpu.cpu)
			return idx;
@@ -321,6 +335,32 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
	return perf_cpu_map__idx(cpus, cpu) != -1;
}

bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs)
{
	int nr;

	if (lhs == rhs)
		return true;

	if (!lhs || !rhs)
		return false;

	nr = __perf_cpu_map__nr(lhs);
	if (nr != __perf_cpu_map__nr(rhs))
		return false;

	for (int idx = 0; idx < nr; idx++) {
		if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu)
			return false;
	}
	return true;
}

bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
{
	return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
}

struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
{
	struct perf_cpu result = {
@@ -328,7 +368,9 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
	};

	// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
	return RC_CHK_ACCESS(map)->nr > 0 ? RC_CHK_ACCESS(map)->map[RC_CHK_ACCESS(map)->nr - 1] : result;
	return __perf_cpu_map__nr(map) > 0
		? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1)
		: result;
}

/** Is 'b' a subset of 'a'. */
@@ -336,15 +378,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
{
	if (a == b || !b)
		return true;
	if (!a || RC_CHK_ACCESS(b)->nr > RC_CHK_ACCESS(a)->nr)
	if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a))
		return false;

	for (int i = 0, j = 0; i < RC_CHK_ACCESS(a)->nr; i++) {
		if (RC_CHK_ACCESS(a)->map[i].cpu > RC_CHK_ACCESS(b)->map[j].cpu)
	for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) {
		if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu)
			return false;
		if (RC_CHK_ACCESS(a)->map[i].cpu == RC_CHK_ACCESS(b)->map[j].cpu) {
		if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) {
			j++;
			if (j == RC_CHK_ACCESS(b)->nr)
			if (j == __perf_cpu_map__nr(b))
				return true;
		}
	}
@@ -374,27 +416,27 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
		return perf_cpu_map__get(other);
	}

	tmp_len = RC_CHK_ACCESS(orig)->nr + RC_CHK_ACCESS(other)->nr;
	tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);
	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
	if (!tmp_cpus)
		return NULL;

	/* Standard merge algorithm from wikipedia */
	i = j = k = 0;
	while (i < RC_CHK_ACCESS(orig)->nr && j < RC_CHK_ACCESS(other)->nr) {
		if (RC_CHK_ACCESS(orig)->map[i].cpu <= RC_CHK_ACCESS(other)->map[j].cpu) {
			if (RC_CHK_ACCESS(orig)->map[i].cpu == RC_CHK_ACCESS(other)->map[j].cpu)
	while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
		if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
			if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
				j++;
			tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++];
			tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
		} else
			tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++];
			tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
	}

	while (i < RC_CHK_ACCESS(orig)->nr)
		tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++];
	while (i < __perf_cpu_map__nr(orig))
		tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);

	while (j < RC_CHK_ACCESS(other)->nr)
		tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++];
	while (j < __perf_cpu_map__nr(other))
		tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
	assert(k <= tmp_len);

	merged = cpu_map__trim_new(k, tmp_cpus);
@@ -402,3 +444,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
	perf_cpu_map__put(orig);
	return merged;
}

struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
					     struct perf_cpu_map *other)
{
	struct perf_cpu *tmp_cpus;
	int tmp_len;
	int i, j, k;
	struct perf_cpu_map *merged = NULL;

	if (perf_cpu_map__is_subset(other, orig))
		return perf_cpu_map__get(orig);
	if (perf_cpu_map__is_subset(orig, other))
		return perf_cpu_map__get(other);

	tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
	if (!tmp_cpus)
		return NULL;

	i = j = k = 0;
	while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
		if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
			i++;
		else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
			j++;
		else {
			j++;
			tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
		}
	}
	if (k)
		merged = cpu_map__trim_new(k, tmp_cpus);
	free(tmp_cpus);
	return merged;
}
Loading