Commit ea15483e authored by German Gomez's avatar German Gomez Committed by Arnaldo Carvalho de Melo
Browse files

perf report: Add 'simd' sort field



Add 'simd' sort field to visualize SIMD ops in 'perf report'.

Rows are labeled with the SIMD ISA, and the type of predicate (if any):

  - [p] partial predicate
  - [e] empty predicate (no elements in the vector being used)

Example with Arm SPE and SVE (Scalable Vector Extension):

  #include <arm_sve.h>

  double src[1025], dst[1025];

  int main(void) {
    svfloat64_t vc = svdup_f64(1);
    for(;;)
      for(int i = 0; i < 1025; i += svcntd())
      {
        svbool_t pg = svwhilelt_b64(i, 1025);
        svfloat64_t vsrc = svld1(pg, &src[i]);
        svfloat64_t vdst = svadd_x(pg, vsrc, vc);
        svst1(pg, &dst[i], vdst);
      }
    return 0;
  }

  ... compiled using "gcc-11 -march=armv8-a+sve -O3"

Profiling on a platform that implements FEAT_SVE and FEAT_SPEv1p1:

  $ perf record -e arm_spe_0// -- ./a.out
  $ perf report --itrace=i1i -s overhead,pid,simd,sym

  Overhead      Pid:Command   Simd     Symbol
  ........  ................  .......  ......................

    53.76%    10758:program            [.] main
    46.14%    10758:program   [.] SVE  [.] main
     0.09%    10758:program   [p] SVE  [.] main

The report shows 0.09% of the sampled SVE operations use partial
predicates due to src and dst arrays not being multiples of the vector
register lengths.

Signed-off-by: default avatarGerman Gomez <german.gomez@arm.com>
Acked-by: default avatarIan Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anshuman.Khandual@arm.com
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20230320151509.1137462-2-james.clark@arm.com


Signed-off-by: default avatarJames Clark <james.clark@arm.com>
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 03a6c16e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -117,6 +117,7 @@ OPTIONS
	- addr: (Full) virtual address of the sampled instruction
	- retire_lat: On X86, this reports pipeline stall of this instruction compared
	  to the previous instruction in cycles. And currently supported only on X86
	- simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate

	By default, comm, dso and symbol keys are used.
	(i.e. --sort comm,dso,symbol)
+1 −0
Original line number Diff line number Diff line
@@ -745,6 +745,7 @@ __hists__add_entry(struct hists *hists,
		.weight = sample->weight,
		.ins_lat = sample->ins_lat,
		.p_stage_cyc = sample->p_stage_cyc,
		.simd_flags = sample->simd_flags,
	}, *he = hists__findnew_entry(hists, &entry, al, sample_self);

	if (!hists->has_callchains && he && he->callchain_size != 0)
+1 −0
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@ enum hist_column {
	HISTC_ADDR_FROM,
	HISTC_ADDR_TO,
	HISTC_ADDR,
	HISTC_SIMD,
	HISTC_NR_COLS, /* Last entry */
};

+47 −0
Original line number Diff line number Diff line
@@ -139,6 +139,52 @@ struct sort_entry sort_thread = {
	.se_width_idx	= HISTC_THREAD,
};

/* --sort simd */

static int64_t
sort__simd_cmp(struct hist_entry *left, struct hist_entry *right)
{
	if (left->simd_flags.arch != right->simd_flags.arch)
		return (int64_t) left->simd_flags.arch - right->simd_flags.arch;

	return (int64_t) left->simd_flags.pred - right->simd_flags.pred;
}

static const char *hist_entry__get_simd_name(struct simd_flags *simd_flags)
{
	u64 arch = simd_flags->arch;

	if (arch & SIMD_OP_FLAGS_ARCH_SVE)
		return "SVE";
	else
		return "n/a";
}

static int hist_entry__simd_snprintf(struct hist_entry *he, char *bf,
				     size_t size, unsigned int width __maybe_unused)
{
	const char *name;

	if (!he->simd_flags.arch)
		return repsep_snprintf(bf, size, "");

	name = hist_entry__get_simd_name(&he->simd_flags);

	if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_EMPTY)
		return repsep_snprintf(bf, size, "[e] %s", name);
	else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_PARTIAL)
		return repsep_snprintf(bf, size, "[p] %s", name);

	return repsep_snprintf(bf, size, "[.] %s", name);
}

struct sort_entry sort_simd = {
	.se_header	= "Simd   ",
	.se_cmp		= sort__simd_cmp,
	.se_snprintf	= hist_entry__simd_snprintf,
	.se_width_idx	= HISTC_SIMD,
};

/* --sort comm */

/*
@@ -2142,6 +2188,7 @@ static struct sort_dimension common_sort_dimensions[] = {
	DIM(SORT_ADDR, "addr", sort_addr),
	DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
	DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
	DIM(SORT_SIMD, "simd", sort_simd)
};

#undef DIM
+2 −0
Original line number Diff line number Diff line
@@ -111,6 +111,7 @@ struct hist_entry {
	u64			p_stage_cyc;
	u8			cpumode;
	u8			depth;
	struct simd_flags	simd_flags;

	/* We are added by hists__add_dummy_entry. */
	bool			dummy;
@@ -241,6 +242,7 @@ enum sort_type {
	SORT_ADDR,
	SORT_LOCAL_RETIRE_LAT,
	SORT_GLOBAL_RETIRE_LAT,
	SORT_SIMD,

	/* branch stack specific sort keys */
	__SORT_BRANCH_STACK,