Unverified Commit 211c4ef8 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!12311 [OLK-5.10] AMD Genoa and Turin Perf patches backport for OLK-5.10

Merge Pull Request from: @PvsNarasimha 
 
perf support:
sample type PERF_SAMPLE_DATA_PAGE_SIZE support
perf tools: Support PERF_SAMPLE_WEIGHT_STRUCT
perf record ibs: Warn about sampling period skew
HYBRID_TOPOLOGY feature
Record non-CPU PMU capabilities
perf script ibs: Support new IBS bits in raw trace dump
perf tool support for IBS DataSrc and L3 miss filtering
"ref-cycles" event support for Zen 4 and later microarchitectures

Install perf and perftest tool to verify patched kernel.
```javascript
#cd /kernel/tools/perf/
#sudo make perf
#cp perf /usr/bin/
```

*Run Test cases*
-----------------------------
* perf/x86/amd/core: Define a proper ref-cycles event for Zen 4 and later
* perf/x86/amd/core: Update and fix stalled-cycles-* events for Zen 2 and later
* x86: Fix comment for X86_FEATURE_ZEN

```javascript
perf list | grep ref-cycles

ref-cycles                                         [Hardware event]
ref-cycles OR cpu/ref-cycles/                      [Kernel PMU event]
```
```javascript
perf stat -e "{ref-cycles, cpu/ref-cycles/}" sleep 5

Performance counter stats for 'sleep 5':
  371,296      ref-cycles:u
  
  371,296       cpu/ref-cycles/u

  5.002200577 seconds time elapsed
  
  0.000000000 seconds user
  
  0.001862000 seconds sys
```
-------------------------------------------------------------------
* perf script ibs: Support new IBS bits in raw trace dump
* perf tool ibs: Sync AMD IBS header file
* perf report: Add support to print a textual representation of IBS raw sample data
* perf evlist: Use the right prefix for 'struct evlist' sample id lookup methods
* perf evlist: Use the right prefix for 'struct evlist' sample parsing methods
* perf evlist: Use the right prefix for 'struct evlist' raw samples methods
* perf report: Add tools/arch/x86/include/asm/amd-ibs.h

```javascript
$sudo perf record -c 130 -a -e ibs_op/l3missonly=1/ --raw-samples
$sudo perf report -D
```
...

ibs_op_ctl:     0000004500070008 MaxCnt       128 L3MissOnly 1 En 1

Val 1 CntCtl 0=cycles CurCnt        69
ibs_op_data:    0000000000710002 CompToRetCtr     2 TagToRetCtr   113

BrnRet 0  RipInvalid 0 BrnFuse 0 Microcode 0
ibs_op_data2:   0000000000000002 CacheHitSt 0=M-state RmtNode 0

DataSrc 2=A peer cache in a near CCX
ibs_op_data3:   000000681d1700a1 LdOp 1 StOp 0 DcL1TlbMiss 0

DcL2TlbMiss 0 DcL1TlbHit2M 0 DcL1TlbHit1G 1 DcL2TlbHit2M 0

DcMiss 1 DcMisAcc 0 DcWcMemAcc 0 DcUcMemAcc 0 DcLockedOp 0

DcMissNoMabAlloc 1 DcLinAddrValid 1 DcPhyAddrValid 1

DcL2TlbHit1G 0 L2Miss 1 SwPf 0 OpMemWidth  8 bytes

OpDcMissOpenMemReqs  7 DcMissLat   104 TlbRefillLat     0
```

*IBS Fetch PMU ex:*
```javascript
$sudo perf record -c 130 -a -e ibs_fetch/l3missonly=1/ --raw-samples
sudo ./perf report -D

...
ibs_fetch_ctl:  3c1f00c700080008 MaxCnt     128 Cnt     128 Lat   199
  
En 1 Val 1 Comp 1 IcMiss 1 PhyAddrValid        1 L1TlbPgSz 4KB
  
L1TlbMiss 0 L2TlbMiss 0 RandEn 0 L2Miss 1 L3MissOnly 1
  
FetchOcMiss 1 FetchL3Miss 1
```
----------------------------------------------------------------------------------
* perf header: Record non-CPU PMU capabilities
* perf inject: Keep some features sections from input file
* libperf: Add preadn()
* perf header: Add ability to keep feature sections

  ```javascript
  $ perf inject -i perf.data-from-desktop -o injected-perf.data

  $ perf script --header-only -i injected-perf.data | head -15
  # ========
  # captured on    : Fri May 20 15:08:54 2022
  # header version : 1
  # data offset    : 1208
  # data size      : 837480
  # feat offset    : 838688
  # hostname : Desktop
  # os release : 5.13.0-41-generic
  # perf version : 5.18.rc5.gac837f7ca7ed
  # arch : x86_64
  # nrcpus online : 28
  # nrcpus avail : 28
  # cpudesc : Intel(R) Core(TM) i9-9940X CPU @ 3.30GHz
  # cpuid : GenuineIntel,6,85,4
  # total memory : 65548656 kB
```
--------------------------------------------------------------------------------------
* perf tools: Support PERF_SAMPLE_WEIGHT_STRUCT
* perf script: Fix PERF_SAMPLE_WEIGHT_STRUCT support
* perf record: Add support for PERF_SAMPLE_CODE_PAGE_SIZE

```javascript
sudo perf record --code-page-size sleep 1

Error:
Asking for the code page size isn't supported by this kernel.
```
-----------------------------------------------------------------------------------------
* perf evsel: Emit warning about kernel not supporting the data page size sample_type bit
* perf record: Support new sample type for data page size

```javascript
sudo perf record --data-page-size sleep 1

Error:
Asking for the data page size isn't supported by this kernel.
```
-----------------------------------------------------------------------------------------
* perf/x86/amd: Do not WARN() on every IRQ
* perf/x86/amd/core: Avoid register reset when CPU is dead

```javascript
git clone https://github.com/deater/perf_event_tests.git

[sudo sysctl kernel.perf_event_paranoid=2]

./fuzzer/perf_fuzzer -t OC -s 100
....
....
Close:  5/5 Successful
Read:   0/0 Successful
Write:  0/0 Successful
Ioctl:  0/0 Successful:

```

--------------------------------------------------------------------------------
* perf header: Store PMU caps in an array of strings
* perf header: Pass "cpu" pmu name while printing caps
* perf header: Support HYBRID_CPU_PMU_CAPS feature
* perf header: Support HYBRID_TOPOLOGY feature
* perf pmu: Add hybrid helper functions
* perf pmu: Save detected hybrid pmus to a global pmu list
* perf env: Conditionally compile BPF support code on having HAVE_LIBBPF_SUPPORT
* perf pmu: Parse pmu caps sysfs only once

```javascript
sudo ./perf record -c 10000 -C 0 -e ibs_fetch/l3missonly=1/ sleep 5
 
WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled
and tagged operation does not cause L3 Miss. This causes sampling period skew.
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.369 MB perf.data ]
[amd@volcano9f8e-oshost malathi]$ sudo ./perf report --header-only -I

...
...
search HYBRID_TOPO
```

 
 
Link:https://gitee.com/openeuler/kernel/pulls/12311

 

Reviewed-by: default avatarLiu Chao <liuchao173@huawei.com>
Reviewed-by: default avatarWenkuan Wang <wenkuan.wang@amd.com>
Reviewed-by: default avatarLi Nan <linan122@huawei.com>
Signed-off-by: default avatarLi Nan <linan122@huawei.com>
parents 5c3dff2d d5b41d4c
Loading
Loading
Loading
Loading
+41 −5
Original line number Diff line number Diff line
@@ -251,7 +251,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
 * AMD Performance Monitor Family 17h and later:
 */
static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
@@ -263,10 +263,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x0187,
};

static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00a9,
};

static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00a9,
	[PERF_COUNT_HW_REF_CPU_CYCLES]		= 0x100000120,
};

static u64 amd_pmu_event_map(int hw_event)
{
	if (boot_cpu_data.x86 >= 0x17)
		return amd_f17h_perfmon_event_map[hw_event];
	if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a)
		return amd_zen4_perfmon_event_map[hw_event];

	if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
		return amd_zen2_perfmon_event_map[hw_event];

	if (cpu_feature_enabled(X86_FEATURE_ZEN1))
		return amd_zen1_perfmon_event_map[hw_event];

	return amd_perfmon_event_map[hw_event];
}
@@ -653,7 +682,6 @@ static void amd_pmu_cpu_dead(int cpu)
{
	struct cpu_hw_events *cpuhw;

	amd_pmu_cpu_reset(cpu);

	if (!x86_pmu.amd_nb_constraints)
		return;
@@ -927,7 +955,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
	struct hw_perf_event *hwc;
	struct perf_event *event;
	int handled = 0, idx;
	u64 status, mask;
	u64 reserved, status, mask;
	bool pmu_enabled;

	/*
@@ -946,6 +974,14 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
	if (!status)
		goto done;

	reserved = status & ~amd_pmu_global_cntr_mask;
	if (reserved)
		pr_warn_once("Reserved PerfCntrGlobalStatus bits are set (0x%llx), please consider updating microcode\n",
			     reserved);

	/* Clear any reserved bits set by buggy microcode */
	status &= amd_pmu_global_cntr_mask;

	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
		if (!test_bit(idx, cpuc->active_mask))
			continue;
+136 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * From PPR Vol 1 for AMD Family 19h Model 01h B1
 * 55898 Rev 0.35 - Feb 5, 2021
 */

#include "msr-index.h"

/*
 * IBS Hardware MSRs
 */

/* MSR 0xc0011030: IBS Fetch Control */
union ibs_fetch_ctl {
	__u64 val;
	struct {
		__u64	fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
			fetch_cnt:16,	/* 16-31: instruction fetch count */
			fetch_lat:16,	/* 32-47: instruction fetch latency */
			fetch_en:1,	/* 48: instruction fetch enable */
			fetch_val:1,	/* 49: instruction fetch valid */
			fetch_comp:1,	/* 50: instruction fetch complete */
			ic_miss:1,	/* 51: i-cache miss */
			phy_addr_valid:1,/* 52: physical address valid */
			l1tlb_pgsz:2,	/* 53-54: i-cache L1TLB page size
					 *	  (needs IbsPhyAddrValid) */
			l1tlb_miss:1,	/* 55: i-cache fetch missed in L1TLB */
			l2tlb_miss:1,	/* 56: i-cache fetch missed in L2TLB */
			rand_en:1,	/* 57: random tagging enable */
			fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
					 *      (needs IbsFetchComp) */
			l3_miss_only:1,	/* 59: Collect L3 miss samples only */
			fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */
			fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */
			reserved:2;	/* 62-63: reserved */
	};
};

/* MSR 0xc0011033: IBS Execution Control */
union ibs_op_ctl {
	__u64 val;
	struct {
		__u64	opmaxcnt:16,	/* 0-15: periodic op max. count */
			l3_miss_only:1,	/* 16: Collect L3 miss samples only */
			op_en:1,	/* 17: op sampling enable */
			op_val:1,	/* 18: op sample valid */
			cnt_ctl:1,	/* 19: periodic op counter control */
			opmaxcnt_ext:7,	/* 20-26: upper 7 bits of periodic op maximum count */
			reserved0:5,	/* 27-31: reserved */
			opcurcnt:27,	/* 32-58: periodic op counter current count */
			reserved1:5;	/* 59-63: reserved */
	};
};

/* MSR 0xc0011035: IBS Op Data 2 */
union ibs_op_data {
	__u64 val;
	struct {
		__u64	comp_to_ret_ctr:16,	/* 0-15: op completion to retire count */
			tag_to_ret_ctr:16,	/* 15-31: op tag to retire count */
			reserved1:2,		/* 32-33: reserved */
			op_return:1,		/* 34: return op */
			op_brn_taken:1,		/* 35: taken branch op */
			op_brn_misp:1,		/* 36: mispredicted branch op */
			op_brn_ret:1,		/* 37: branch op retired */
			op_rip_invalid:1,	/* 38: RIP is invalid */
			op_brn_fuse:1,		/* 39: fused branch op */
			op_microcode:1,		/* 40: microcode op */
			reserved2:23;		/* 41-63: reserved */
	};
};

/* MSR 0xc0011036: IBS Op Data 2 */
union ibs_op_data2 {
	__u64 val;
	struct {
		__u64	data_src_lo:3,	/* 0-2: data source low */
			reserved0:1,	/* 3: reserved */
			rmt_node:1,	/* 4: destination node */
			cache_hit_st:1,	/* 5: cache hit state */
			data_src_hi:2,	/* 6-7: data source high */
			reserved1:56;	/* 8-63: reserved */
	};
};

/* MSR 0xc0011037: IBS Op Data 3 */
union ibs_op_data3 {
	__u64 val;
	struct {
		__u64	ld_op:1,			/* 0: load op */
			st_op:1,			/* 1: store op */
			dc_l1tlb_miss:1,		/* 2: data cache L1TLB miss */
			dc_l2tlb_miss:1,		/* 3: data cache L2TLB hit in 2M page */
			dc_l1tlb_hit_2m:1,		/* 4: data cache L1TLB hit in 2M page */
			dc_l1tlb_hit_1g:1,		/* 5: data cache L1TLB hit in 1G page */
			dc_l2tlb_hit_2m:1,		/* 6: data cache L2TLB hit in 2M page */
			dc_miss:1,			/* 7: data cache miss */
			dc_mis_acc:1,			/* 8: misaligned access */
			reserved:4,			/* 9-12: reserved */
			dc_wc_mem_acc:1,		/* 13: write combining memory access */
			dc_uc_mem_acc:1,		/* 14: uncacheable memory access */
			dc_locked_op:1,			/* 15: locked operation */
			dc_miss_no_mab_alloc:1,		/* 16: DC miss with no MAB allocated */
			dc_lin_addr_valid:1,		/* 17: data cache linear address valid */
			dc_phy_addr_valid:1,		/* 18: data cache physical address valid */
			dc_l2_tlb_hit_1g:1,		/* 19: data cache L2 hit in 1GB page */
			l2_miss:1,			/* 20: L2 cache miss */
			sw_pf:1,			/* 21: software prefetch */
			op_mem_width:4,			/* 22-25: load/store size in bytes */
			op_dc_miss_open_mem_reqs:6,	/* 26-31: outstanding mem reqs on DC fill */
			dc_miss_lat:16,			/* 32-47: data cache miss latency */
			tlb_refill_lat:16;		/* 48-63: L1 TLB refill latency */
	};
};

/* MSR 0xc001103c: IBS Fetch Control Extended */
union ic_ibs_extd_ctl {
	__u64 val;
	struct {
		__u64	itlb_refill_lat:16,	/* 0-15: ITLB Refill latency for sampled fetch */
			reserved:48;		/* 16-63: reserved */
	};
};

/*
 * IBS driver related
 */

struct perf_ibs_data {
	u32		size;
	union {
		u32	data[0];	/* data buffer starts here */
		u32	caps;
	};
	u64		regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
+2 −0
Original line number Diff line number Diff line
@@ -9,4 +9,6 @@ extern unsigned int page_size;
ssize_t readn(int fd, void *buf, size_t n);
ssize_t writen(int fd, const void *buf, size_t n);

ssize_t preadn(int fd, void *buf, size_t n, off_t offs);

#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
+20 −0
Original line number Diff line number Diff line
@@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n)
	return ion(true, fd, buf, n);
}

ssize_t preadn(int fd, void *buf, size_t n, off_t offs)
{
	size_t left = n;

	while (left) {
		ssize_t ret = pread(fd, buf, left, offs);

		if (ret < 0 && errno == EINTR)
			continue;
		if (ret <= 0)
			return ret;

		left -= ret;
		buf  += ret;
		offs += ret;
	}

	return n;
}

/*
 * Write exactly 'n' bytes or return an error.
 */
+6 −0
Original line number Diff line number Diff line
@@ -293,6 +293,12 @@ OPTIONS
--phys-data::
	Record the sample physical addresses.

--data-page-size::
	Record the sampled data address data page size.

--code-page-size::
	Record the sampled code address (ip) page size

-T::
--timestamp::
	Record the sample timestamps. Use it with 'perf report -D' to see the
Loading