Commit 20aa3970 authored by Adrian Hunter's avatar Adrian Hunter Committed by Arnaldo Carvalho de Melo
Browse files

perf intel-pt: Fix premature IPC



The code assumed a change in cycle count means accurate IPC. That is not
correct, for example when sampling both branches and instructions, or at
a FUP packet (which is not CYC-eligible) address. Fix by using an explicit
flag to indicate when IPC can be sampled.

Fixes: 5b1dc0fd ("perf intel-pt: Add support for samples to contain IPC ratio")
Signed-off-by: default avatarAdrian Hunter <adrian.hunter@intel.com>
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/r/20210205175350.23817-3-adrian.hunter@intel.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 03fb0f85
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -2814,9 +2814,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
		}
		if (intel_pt_sample_time(decoder->pkt_state)) {
			intel_pt_update_sample_time(decoder);
			if (decoder->sample_cyc)
			if (decoder->sample_cyc) {
				decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
				decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
				decoder->sample_cyc = false;
			}
		}
		/*
		 * When using only TSC/MTC to compute cycles, IPC can be
		 * sampled as soon as the cycle count changes.
		 */
		if (!decoder->have_cyc)
			decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
	}

	decoder->state.timestamp = decoder->sample_timestamp;
+1 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#define INTEL_PT_ABORT_TX	(1 << 1)
#define INTEL_PT_ASYNC		(1 << 2)
#define INTEL_PT_FUP_IP		(1 << 3)
#define INTEL_PT_SAMPLE_IPC	(1 << 4)

enum intel_pt_sample_type {
	INTEL_PT_BRANCH		= 1 << 0,
+6 −10
Original line number Diff line number Diff line
@@ -1381,6 +1381,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
		sample.branch_stack = (struct branch_stack *)&dummy_bs;
	}

	if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
		sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
	if (sample.cyc_cnt) {
		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
@@ -1431,6 +1432,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
	else
		sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;

	if (ptq->state->flags & INTEL_PT_SAMPLE_IPC)
		sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
	if (sample.cyc_cnt) {
		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
@@ -1983,14 +1985,8 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)

	ptq->have_sample = false;

	if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
		/*
		 * Cycle count and instruction count only go together to create
		 * a valid IPC ratio when the cycle count changes.
		 */
	ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
	ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
	}

	/*
	 * Do PEBS first to allow for the possibility that the PEBS timestamp