Commit d7d213e0 authored by Kan Liang's avatar Kan Liang Committed by Arnaldo Carvalho de Melo
Browse files

perf report: Support Retire Latency



The Retire Latency field is added in the var3_w of the
PERF_SAMPLE_WEIGHT_STRUCT. The Retire Latency reports pipeline stall of
this instruction compared to the previous instruction in cycles.  That's
quite useful to display the information with perf mem report.

The p_stage_cyc for Power is also from the var3_w. Union the p_stage_cyc
and retire_lat to share the code.

Implement X86 specific codes to display the X86 specific header.

Add a new sort key retire_lat for the Retire Latency.

Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20230104201349.1451191-8-kan.liang@linux.intel.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent ebab2916
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -115,6 +115,8 @@ OPTIONS
	- p_stage_cyc: On powerpc, this presents the number of cycles spent in a
	  pipeline stage. And currently supported only on powerpc.
	- addr: (Full) virtual address of the sampled instruction
	- retire_lat: On X86, this reports pipeline stall of this instruction compared
	  to the previous instruction in cycles. And currently supported only on X86

	By default, comm, dso and symbol keys are used.
	(i.e. --sort comm,dso,symbol)
+20 −0
Original line number Diff line number Diff line
@@ -89,6 +89,7 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
	else {
		data->weight = weight.var1_dw;
		data->ins_lat = weight.var2_w;
		data->retire_lat = weight.var3_w;
	}
}

@@ -102,3 +103,22 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
		*array |= ((u64)data->ins_lat << 32);
	}
}

const char *arch_perf_header_entry(const char *se_header)
{
	if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
		return "Local Retire Latency";
	else if (!strcmp(se_header, "Pipeline Stage Cycle"))
		return "Retire Latency";

	return se_header;
}

int arch_support_sort_key(const char *sort_key)
{
	if (!strcmp(sort_key, "p_stage_cyc"))
		return 1;
	if (!strcmp(sort_key, "local_p_stage_cyc"))
		return 1;
	return 0;
}
+4 −1
Original line number Diff line number Diff line
@@ -92,7 +92,10 @@ struct perf_sample {
	u8  cpumode;
	u16 misc;
	u16 ins_lat;
	union {
		u16 p_stage_cyc;
		u16 retire_lat;
	};
	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
	char insn[MAX_INSN];
	void *raw_data;
+2 −0
Original line number Diff line number Diff line
@@ -2133,6 +2133,8 @@ static struct sort_dimension common_sort_dimensions[] = {
	DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
	DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
	DIM(SORT_ADDR, "addr", sort_addr),
	DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
	DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
};

#undef DIM
+2 −0
Original line number Diff line number Diff line
@@ -237,6 +237,8 @@ enum sort_type {
	SORT_LOCAL_PIPELINE_STAGE_CYC,
	SORT_GLOBAL_PIPELINE_STAGE_CYC,
	SORT_ADDR,
	SORT_LOCAL_RETIRE_LAT,
	SORT_GLOBAL_RETIRE_LAT,

	/* branch stack specific sort keys */
	__SORT_BRANCH_STACK,