Commit 6ea5d1a3 authored by Kan Liang's avatar Kan Liang Committed by Arnaldo Carvalho de Melo
Browse files

perf script: Support instruction latency



The instruction latency information can be recorded on
some platforms, e.g., the Intel Sapphire Rapids server. With both memory
latency (weight) and the new instruction latency information, users can
easily locate the expensive load instructions, and also understand the time
spent in different stages. The users can optimize their applications in
different pipeline stages.

Add a new field "ins_lat" to filter the instruction latency information,
which is available with sample type PERF_SAMPLE_WEIGHT_STRUCT.

Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Link: https://lore.kernel.org/r/1632929894-102778-2-git-send-email-kan.liang@linux.intel.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 57d7ecfd
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -130,7 +130,7 @@ OPTIONS
        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
        srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
        brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr,
        metric, misc, srccode, ipc, data_page_size, code_page_size.
        metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat.
        Field list can be prepended with the type, trace, sw or hw,
        to indicate to which event type the field list applies.
        e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
+12 −2
Original line number Diff line number Diff line
@@ -122,6 +122,7 @@ enum perf_output_field {
	PERF_OUTPUT_TOD             = 1ULL << 32,
	PERF_OUTPUT_DATA_PAGE_SIZE  = 1ULL << 33,
	PERF_OUTPUT_CODE_PAGE_SIZE  = 1ULL << 34,
	PERF_OUTPUT_INS_LAT         = 1ULL << 35,
};

struct perf_script {
@@ -188,6 +189,7 @@ struct output_option {
	{.str = "tod", .field = PERF_OUTPUT_TOD},
	{.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE},
	{.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE},
	{.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT},
};

enum {
@@ -262,7 +264,8 @@ static struct {
			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
			      PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
			      PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR |
			      PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE,
			      PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE |
			      PERF_OUTPUT_INS_LAT,

		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
	},
@@ -522,6 +525,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
	    evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE))
		return -EINVAL;

	if (PRINT_FIELD(INS_LAT) &&
	    evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT))
		return -EINVAL;

	return 0;
}

@@ -2039,6 +2046,9 @@ static void process_event(struct perf_script *script,
	if (PRINT_FIELD(WEIGHT))
		fprintf(fp, "%16" PRIu64, sample->weight);

	if (PRINT_FIELD(INS_LAT))
		fprintf(fp, "%16" PRIu16, sample->ins_lat);

	if (PRINT_FIELD(IP)) {
		struct callchain_cursor *cursor = NULL;

@@ -3715,7 +3725,7 @@ int cmd_script(int argc, const char **argv)
		     "addr,symoff,srcline,period,iregs,uregs,brstack,"
		     "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
		     "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod,"
		     "data_page_size,code_page_size",
		     "data_page_size,code_page_size,ins_lat",
		     parse_output_fields),
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
		    "system-wide collection from all CPUs"),