Commit 590db42d authored by Kan Liang's avatar Kan Liang Committed by Arnaldo Carvalho de Melo
Browse files

perf report: Support instruction latency



The instruction latency information can be recorded on some platforms,
e.g., the Intel Sapphire Rapids server. With both memory latency
(weight) and the new instruction latency information, users can easily
locate the expensive load instructions, and also understand the time
spent in different stages. The users can optimize their applications in
different pipeline stages.

The 'weight' field is shared among different architectures. Reusing the
'weight' field may impacts other architectures. Add a new field to store
the instruction latency.

Like the 'weight' support, introduce a 'ins_lat' for the global
instruction latency, and a 'local_ins_lat' for the local instruction
latency version.

Add new sort functions, INSTR Latency and Local INSTR Latency,
accordingly.

Add local_ins_lat to the default_mem_sort_order[].

Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/1612296553-21962-7-git-send-email-kan.liang@linux.intel.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent ea8d0ed6
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -109,6 +109,9 @@ OPTIONS
	- time: Separate the samples by time stamp with the resolution specified by
	--time-quantum (default 100ms). Specify with overhead and before it.
	- code_page_size: the code page size of sampled code address (ip)
	- ins_lat: Instruction latency in core cycles. This is the global instruction
	  latency
	- local_ins_lat: Local instruction latency version

	By default, comm, dso and symbol keys are used.
	(i.e. --sort comm,dso,symbol)
@@ -155,7 +158,8 @@ OPTIONS
	- blocked: reason of blocked load access for the data at the time of the sample

	And the default sort keys are changed to local_weight, mem, sym, dso,
	symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, see '--mem-mode'.
	symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat,
	see '--mem-mode'.

	If the data file has tracepoint event(s), following (dynamic) sort keys
	are also available:
+1 −0
Original line number Diff line number Diff line
@@ -142,6 +142,7 @@ struct perf_sample {
	u16 insn_len;
	u8  cpumode;
	u16 misc;
	u16 ins_lat;
	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
	char insn[MAX_INSN];
	void *raw_data;
+3 −1
Original line number Diff line number Diff line
@@ -2352,8 +2352,10 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
		weight.full = *array;
		if (type & PERF_SAMPLE_WEIGHT)
			data->weight = weight.full;
		else
		else {
			data->weight = weight.var1_dw;
			data->ins_lat = weight.var2_w;
		}
		array++;
	}

+9 −3
Original line number Diff line number Diff line
@@ -209,6 +209,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
	hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
	hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
	hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
	hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
	hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
	if (symbol_conf.nanosecs)
		hists__new_col_len(hists, HISTC_TIME, 16);
	else
@@ -287,12 +289,13 @@ static long hist_time(unsigned long htime)
}

static void he_stat__add_period(struct he_stat *he_stat, u64 period,
				u64 weight)
				u64 weight, u64 ins_lat)
{

	he_stat->period		+= period;
	he_stat->weight		+= weight;
	he_stat->nr_events	+= 1;
	he_stat->ins_lat	+= ins_lat;
}

static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
@@ -304,6 +307,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
	dest->period_guest_us	+= src->period_guest_us;
	dest->nr_events		+= src->nr_events;
	dest->weight		+= src->weight;
	dest->ins_lat		+= src->ins_lat;
}

static void he_stat__decay(struct he_stat *he_stat)
@@ -592,6 +596,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
	int64_t cmp;
	u64 period = entry->stat.period;
	u64 weight = entry->stat.weight;
	u64 ins_lat = entry->stat.ins_lat;
	bool leftmost = true;

	p = &hists->entries_in->rb_root.rb_node;
@@ -610,11 +615,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,

		if (!cmp) {
			if (sample_self) {
				he_stat__add_period(&he->stat, period, weight);
				he_stat__add_period(&he->stat, period, weight, ins_lat);
				hist_entry__add_callchain_period(he, period);
			}
			if (symbol_conf.cumulate_callchain)
				he_stat__add_period(he->stat_acc, period, weight);
				he_stat__add_period(he->stat_acc, period, weight, ins_lat);

			/*
			 * This mem info was allocated from sample__resolve_mem
@@ -725,6 +730,7 @@ __hists__add_entry(struct hists *hists,
			.nr_events = 1,
			.period	= sample->period,
			.weight = sample->weight,
			.ins_lat = sample->ins_lat,
		},
		.parent = sym_parent,
		.filtered = symbol__parent_filter(sym_parent) | al->filtered,
+2 −0
Original line number Diff line number Diff line
@@ -73,6 +73,8 @@ enum hist_column {
	HISTC_DSO_SIZE,
	HISTC_SYMBOL_IPC,
	HISTC_MEM_BLOCKED,
	HISTC_LOCAL_INS_LAT,
	HISTC_GLOBAL_INS_LAT,
	HISTC_NR_COLS, /* Last entry */
};

Loading