Commit 3a36281a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-tools-for-v5.12-2020-02-19' of...

Merge tag 'perf-tools-for-v5.12-2020-02-19' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tool updates from Arnaldo Carvalho de Melo:
 "New features:

   - Support instruction latency in 'perf report', with both memory
     latency (weight) and instruction latency information, users can
     locate expensive load instructions and understand time spent in
     different stages.

   - Extend 'perf c2c' to display the number of loads which were blocked
     by data or address conflict.

   - Add 'perf stat' support for L2 topdown events in systems such as
     Intel's Sapphire rapids server.

   - Add support for PERF_SAMPLE_CODE_PAGE_SIZE in various tools, as a
     sort key, for instance:

        perf report --stdio --sort=comm,symbol,code_page_size

   - New 'perf daemon' command to run long running sessions while
     providing a way to control the enablement of events without
     restarting a traditional 'perf record' session.

   - Enable counting events for BPF programs in 'perf stat' just like
     for other targets (tid, cgroup, cpu, etc), e.g.:

        # perf stat -e ref-cycles,cycles -b 254 -I 1000
           1.487903822            115,200      ref-cycles
           1.487903822             86,012      cycles
           2.489147029             80,560      ref-cycles
           2.489147029             73,784      cycles
        ^C

     The example above counts 'cycles' and 'ref-cycles' of BPF program
     of id 254. It is similar to bpftool-prog-profile command, but more
     flexible.

   - Support the new layout for PERF_RECORD_MMAP2 to carry the DSO
     build-id using infrastructure generalised from the eBPF subsystem,
     removing the need for traversing the perf.data file to collect
     build-ids at the end of 'perf record' sessions and helping with
     long running sessions where binaries can get replaced in updates,
     leading to possible mis-resolution of symbols.

   - Support filtering by hex address in 'perf script'.

   - Support DSO filter in 'perf script', like in other perf tools.

   - Add namespaces support to 'perf inject'

   - Add support for SDT (Dtrace Style Markers) events on ARM64.

  perf record:

   - Fix handling of eventfd() when draining a buffer in 'perf record'.

   - Improvements to the generation of metadata events for pre-existing
     threads (mmaps, comm, etc), speeding up the work done at the start
     of system wide or per CPU 'perf record' sessions.

  Hardware tracing:

   - Initial support for tracing KVM with Intel PT.

   - Intel PT fixes for IPC

   - Support Intel PT PSB (synchronization packets) events.

   - Automatically group aux-output events to overcome --filter syntax.

   - Enable PERF_SAMPLE_DATA_SRC on ARMs SPE.

   - Update ARM's CoreSight hardware tracing OpenCSD library to v1.0.0.

  perf annotate TUI:

   - Fix handling of 'k' ("show line number") hotkey

   - Fix jump parsing for C++ code.

  perf probe:

   - Add protection to avoid endless loop.

  cgroups:

   - Avoid reading cgroup mountpoint multiple times, caching it.

   - Fix handling of cgroup v1/v2 in mixed hierarchy.

  Symbol resolving:

   - Add OCaml symbol demangling.

   - Further fixes for handling PE executables when using perf with Wine
     and .exe/.dll files.

   - Fix 'perf unwind' DSO handling.

   - Resolve symbols against debug file first, to deal with artifacts
     related to LTO.

   - Fix gap between kernel end and module start on powerpc.

  Reporting tools:

   - The DSO filter shouldn't show samples in unresolved maps.

   - Improve debuginfod support in various tools.

  build ids:

   - Fix 16-byte build ids in 'perf buildid-cache', add a 'perf test'
     entry for that case.

  perf test:

   - Support for PERF_SAMPLE_WEIGHT_STRUCT.

   - Add test case for PERF_SAMPLE_CODE_PAGE_SIZE.

   - Shell based tests for 'perf daemon's commands ('start', 'stop,
     'reconfig', 'list', etc).

   - ARM cs-etm 'perf test' fixes.

   - Add parse-metric memory bandwidth testcase.

  Compiler related:

   - Fix 'perf probe' kretprobe issue caused by gcc 11 bug when used
     with -fpatchable-function-entry.

   - Fix ARM64 build with gcc 11's -Wformat-overflow.

   - Fix unaligned access in sample parsing test.

   - Fix printf conversion specifier for IP addresses on arm64, s390 and
     powerpc.

  Arch specific:

   - Support exposing Performance Monitor Counter SPRs as part of
     extended regs on powerpc.

   - Add JSON 'perf stat' metrics for ARM64's imx8mp, imx8mq and imx8mn
     DDR, fix imx8mm ones.

   - Fix common and uarch events for ARM64's A76 and Ampere eMag"

* tag 'perf-tools-for-v5.12-2020-02-19' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (148 commits)
  perf buildid-cache: Don't skip 16-byte build-ids
  perf buildid-cache: Add test for 16-byte build-id
  perf symbol: Remove redundant libbfd checks
  perf test: Output the sub testing result in cs-etm
  perf test: Suppress logs in cs-etm testing
  perf tools: Fix arm64 build error with gcc-11
  perf intel-pt: Add documentation for tracing virtual machines
  perf intel-pt: Split VM-Entry and VM-Exit branches
  perf intel-pt: Adjust sample flags for VM-Exit
  perf intel-pt: Allow for a guest kernel address filter
  perf intel-pt: Support decoding of guest kernel
  perf machine: Factor out machine__idle_thread()
  perf machine: Factor out machines__find_guest()
  perf intel-pt: Amend decoder to track the NR flag
  perf intel-pt: Retain the last PIP packet payload as is
  perf intel_pt: Add vmlaunch and vmresume as branches
  perf script: Add branch types for VM-Entry and VM-Exit
  perf auxtrace: Automatically group aux-output events
  perf test: Fix unaligned access in sample parsing test
  perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing
  ...
parents 7c70f3a7 3027ce36
Loading
Loading
Loading
Loading
+22 −6
Original line number Diff line number Diff line
@@ -55,17 +55,33 @@ enum perf_event_powerpc_regs {
	PERF_REG_POWERPC_MMCR3,
	PERF_REG_POWERPC_SIER2,
	PERF_REG_POWERPC_SIER3,
	PERF_REG_POWERPC_PMC1,
	PERF_REG_POWERPC_PMC2,
	PERF_REG_POWERPC_PMC3,
	PERF_REG_POWERPC_PMC4,
	PERF_REG_POWERPC_PMC5,
	PERF_REG_POWERPC_PMC6,
	/* Max regs without the extended regs */
	PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
};

#define PERF_REG_PMU_MASK	((1ULL << PERF_REG_POWERPC_MAX) - 1)

/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
#define PERF_REG_PMU_MASK_300   (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
#define PERF_REG_PMU_MASK_31   (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */
#define	PERF_EXCLUDE_REG_EXT_300	(7ULL << PERF_REG_POWERPC_MMCR3)

#define PERF_REG_MAX_ISA_300   (PERF_REG_POWERPC_MMCR2 + 1)
#define PERF_REG_MAX_ISA_31    (PERF_REG_POWERPC_SIER3 + 1)
/*
 * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
 * includes 9 SPRS from MMCR0 to PMC6 excluding the
 * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300.
 */
#define PERF_REG_PMU_MASK_300   ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300)

/*
 * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
 * includes 12 SPRs from MMCR0 to PMC6.
 */
#define PERF_REG_PMU_MASK_31   (0xfffULL << PERF_REG_POWERPC_MMCR0)

#define PERF_REG_EXTENDED_MAX  (PERF_REG_POWERPC_PMC6 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
+2 −0
Original line number Diff line number Diff line
@@ -146,6 +146,8 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
		     /boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))

bootstrap: $(BPFTOOL_BOOTSTRAP)

ifneq ($(VMLINUX_BTF)$(VMLINUX_H),)
ifeq ($(feature-clang-bpf-co-re),1)

+3 −1
Original line number Diff line number Diff line
@@ -99,7 +99,9 @@ FEATURE_TESTS_EXTRA := \
         clang                          \
         libbpf                         \
         libpfm4                        \
         libdebuginfod
         libdebuginfod			\
         clang-bpf-co-re


FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)

+2 −2
Original line number Diff line number Diff line
@@ -4,9 +4,9 @@
/*
 * Check OpenCSD library version is sufficient to provide required features
 */
#define OCSD_MIN_VER ((0 << 16) | (14 << 8) | (0))
#define OCSD_MIN_VER ((1 << 16) | (0 << 8) | (0))
#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
#error "OpenCSD >= 0.14.0 is required"
#error "OpenCSD >= 1.0.0 is required"
#endif

int main(void)
+87 −9
Original line number Diff line number Diff line
@@ -145,12 +145,14 @@ enum perf_event_sample_format {
	PERF_SAMPLE_CGROUP			= 1U << 21,
	PERF_SAMPLE_DATA_PAGE_SIZE		= 1U << 22,
	PERF_SAMPLE_CODE_PAGE_SIZE		= 1U << 23,
	PERF_SAMPLE_WEIGHT_STRUCT		= 1U << 24,

	PERF_SAMPLE_MAX = 1U << 24,		/* non-ABI */
	PERF_SAMPLE_MAX = 1U << 25,		/* non-ABI */

	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
};

#define PERF_SAMPLE_WEIGHT_TYPE	(PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
/*
 * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
 *
@@ -386,7 +388,8 @@ struct perf_event_attr {
				aux_output     :  1, /* generate AUX records instead of events */
				cgroup         :  1, /* include cgroup events */
				text_poke      :  1, /* include text poke events */
				__reserved_1   : 30;
				build_id       :  1, /* use build id in mmap2 events */
				__reserved_1   : 29;

	union {
		__u32		wakeup_events;	  /* wakeup every n events */
@@ -659,6 +662,22 @@ struct perf_event_mmap_page {
	__u64	aux_size;
};

/*
 * The current state of perf_event_header::misc bits usage:
 * ('|' used bit, '-' unused bit)
 *
 *  012         CDEF
 *  |||---------||||
 *
 *  Where:
 *    0-2     CPUMODE_MASK
 *
 *    C       PROC_MAP_PARSE_TIMEOUT
 *    D       MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT
 *    E       MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT
 *    F       (reserved)
 */

#define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
#define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
#define PERF_RECORD_MISC_KERNEL			(1 << 0)
@@ -690,6 +709,7 @@ struct perf_event_mmap_page {
 *
 *   PERF_RECORD_MISC_EXACT_IP           - PERF_RECORD_SAMPLE of precise events
 *   PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
 *   PERF_RECORD_MISC_MMAP_BUILD_ID      - PERF_RECORD_MMAP2 event
 *
 *
 * PERF_RECORD_MISC_EXACT_IP:
@@ -699,9 +719,13 @@ struct perf_event_mmap_page {
 *
 * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
 *   Indicates that thread was preempted in TASK_RUNNING state.
 *
 * PERF_RECORD_MISC_MMAP_BUILD_ID:
 *   Indicates that mmap2 event carries build id data.
 */
#define PERF_RECORD_MISC_EXACT_IP		(1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT	(1 << 14)
#define PERF_RECORD_MISC_MMAP_BUILD_ID		(1 << 14)
/*
 * Reserve the last bit to indicate some extended misc field
 */
@@ -890,7 +914,24 @@ enum perf_event_type {
	 * 	  char			data[size];
	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
	 *
	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
	 *	{ union perf_sample_weight
	 *	 {
	 *		u64		full; && PERF_SAMPLE_WEIGHT
	 *	#if defined(__LITTLE_ENDIAN_BITFIELD)
	 *		struct {
	 *			u32	var1_dw;
	 *			u16	var2_w;
	 *			u16	var3_w;
	 *		} && PERF_SAMPLE_WEIGHT_STRUCT
	 *	#elif defined(__BIG_ENDIAN_BITFIELD)
	 *		struct {
	 *			u16	var3_w;
	 *			u16	var2_w;
	 *			u32	var1_dw;
	 *		} && PERF_SAMPLE_WEIGHT_STRUCT
	 *	#endif
	 *	 }
	 *	}
	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
	 *	{ u64			abi; # enum perf_sample_regs_abi
@@ -915,10 +956,20 @@ enum perf_event_type {
	 *	u64				addr;
	 *	u64				len;
	 *	u64				pgoff;
	 *	union {
	 *		struct {
	 *			u32		maj;
	 *			u32		min;
	 *			u64		ino;
	 *			u64		ino_generation;
	 *		};
	 *		struct {
	 *			u8		build_id_size;
	 *			u8		__reserved_1;
	 *			u16		__reserved_2;
	 *			u8		build_id[20];
	 *		};
	 *	};
	 *	u32				prot, flags;
	 *	char				filename[];
	 * 	struct sample_id		sample_id;
@@ -1127,14 +1178,16 @@ union perf_mem_data_src {
			mem_lvl_num:4,	/* memory hierarchy level number */
			mem_remote:1,   /* remote */
			mem_snoopx:2,	/* snoop mode, ext */
			mem_rsvd:24;
			mem_blk:3,	/* access blocked */
			mem_rsvd:21;
	};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
	__u64 val;
	struct {
		__u64	mem_rsvd:24,
		__u64	mem_rsvd:21,
			mem_blk:3,	/* access blocked */
			mem_snoopx:2,	/* snoop mode, ext */
			mem_remote:1,   /* remote */
			mem_lvl_num:4,	/* memory hierarchy level number */
@@ -1217,6 +1270,12 @@ union perf_mem_data_src {
#define PERF_MEM_TLB_OS		0x40 /* OS fault handler */
#define PERF_MEM_TLB_SHIFT	26

/* Access blocked */
#define PERF_MEM_BLK_NA		0x01 /* not available */
#define PERF_MEM_BLK_DATA	0x02 /* data could not be forwarded */
#define PERF_MEM_BLK_ADDR	0x04 /* address conflict */
#define PERF_MEM_BLK_SHIFT	40

#define PERF_MEM_S(a, s) \
	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)

@@ -1248,4 +1307,23 @@ struct perf_branch_entry {
		reserved:40;
};

union perf_sample_weight {
	__u64		full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
	struct {
		__u32	var1_dw;
		__u16	var2_w;
		__u16	var3_w;
	};
#elif defined(__BIG_ENDIAN_BITFIELD)
	struct {
		__u16	var3_w;
		__u16	var2_w;
		__u32	var1_dw;
	};
#else
#error "Unknown endianness"
#endif
};

#endif /* _UAPI_LINUX_PERF_EVENT_H */
Loading