Commit 7c1b16ba authored by Adrian Hunter's avatar Adrian Hunter Committed by Arnaldo Carvalho de Melo
Browse files

perf intel-pt: Add support for decoding FUP/TIP only



Use the new itrace 'q' option to add support for a mode of decoding that
ignores TNT, does not walk object code, but gets the ip from FUP and TIP
packets.

Example:

 $ perf record -e intel_pt//u grep -rI pudding drivers
 [ perf record: Woken up 52 times to write data ]
 [ perf record: Captured and wrote 57.870 MB perf.data ]
 $ time perf script --itrace=bi | wc -l
 58948289

 real    1m23.863s
 user    1m23.251s
 sys     0m7.452s
 $ time perf script --itrace=biq | wc -l
 3385694

 real    0m4.453s
 user    0m4.455s
 sys     0m0.328s

Signed-off-by: default avatarAdrian Hunter <adrian.hunter@intel.com>
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lore.kernel.org/lkml/20200710151104.15137-12-adrian.hunter@intel.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 51971536
Loading
Loading
Loading
Loading
+31 −0
Original line number Diff line number Diff line
@@ -825,6 +825,7 @@ The letters are:
	l	synthesize last branch entries (use with i or x)
	L	synthesize last branch entries on existing event records
	s	skip initial number of events
	q	quicker (less detailed) decoding

"Instructions" events look like they were recorded by "perf record -e
instructions".
@@ -969,6 +970,36 @@ at the beginning. This is useful to ignore initialization code.

skips the first million instructions.

The q option changes the way the trace is decoded.  The decoding is much faster
but much less detailed.  Specifically, with the q option, the decoder does not
decode TNT packets, and does not walk object code, but gets the ip from FUP and
TIP packets.  The q option can be used with the b and i options but the period
is not used.  The q option decodes more quickly, but is useful only if the
control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or
TIP.PGD packets (refer below).  However the q option could be used to find time
ranges that could then be decoded fully using the --time option.

What will *not* be decoded with the (single) q option:

	- direct calls and jmps
	- conditional branches
	- non-branch instructions

What *will* be decoded with the (single) q option:

	- asynchronous branches such as interrupts
	- indirect branches
	- function return target address *if* the noretcomp config term (refer
	config terms section) was used
	- start of (control-flow) tracing
	- end of (control-flow) tracing, if it is not out of context
	- power events, ptwrite, transaction start and abort
	- instruction pointer associated with PSB packets

Note the q option does not specify what events will be synthesized e.g. the p
option must be used also to show power events.


dump option
~~~~~~~~~~~

+163 −4
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@ enum intel_pt_pkt_state {
	INTEL_PT_STATE_TIP_PGD,
	INTEL_PT_STATE_FUP,
	INTEL_PT_STATE_FUP_NO_TIP,
	INTEL_PT_STATE_RESAMPLE,
};

static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
@@ -65,6 +66,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
	case INTEL_PT_STATE_ERR_RESYNC:
	case INTEL_PT_STATE_IN_SYNC:
	case INTEL_PT_STATE_TNT_CONT:
	case INTEL_PT_STATE_RESAMPLE:
		return true;
	case INTEL_PT_STATE_TNT:
	case INTEL_PT_STATE_TIP:
@@ -109,6 +111,8 @@ struct intel_pt_decoder {
	bool fixup_last_mtc;
	bool have_last_ip;
	bool in_psb;
	bool hop;
	bool hop_psb_fup;
	enum intel_pt_param_flags flags;
	uint64_t pos;
	uint64_t last_ip;
@@ -235,6 +239,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
	decoder->data               = params->data;
	decoder->return_compression = params->return_compression;
	decoder->branch_enable      = params->branch_enable;
	decoder->hop                = params->quick >= 1;

	decoder->flags              = params->flags;

@@ -275,6 +280,9 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
	intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
	intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);

	if (decoder->hop)
		intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");

	return decoder;
}

@@ -1730,8 +1738,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)

		case INTEL_PT_FUP:
			decoder->pge = true;
			if (decoder->packet.count)
			if (decoder->packet.count) {
				intel_pt_set_last_ip(decoder);
				if (decoder->hop) {
					/* Act on FUP at PSBEND */
					decoder->ip = decoder->last_ip;
					decoder->hop_psb_fup = true;
				}
			}
			break;

		case INTEL_PT_MODE_TSX:
@@ -1875,6 +1889,118 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
	}
}

static int intel_pt_resample(struct intel_pt_decoder *decoder)
{
	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
	decoder->state.type = INTEL_PT_INSTRUCTION;
	decoder->state.from_ip = decoder->ip;
	decoder->state.to_ip = 0;
	return 0;
}

#define HOP_PROCESS	0
#define HOP_IGNORE	1
#define HOP_RETURN	2
#define HOP_AGAIN	3

/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
{
	switch (decoder->packet.type) {
	case INTEL_PT_TNT:
		return HOP_IGNORE;

	case INTEL_PT_TIP_PGD:
		if (!decoder->packet.count)
			return HOP_IGNORE;
		intel_pt_set_ip(decoder);
		decoder->state.type |= INTEL_PT_TRACE_END;
		decoder->state.from_ip = 0;
		decoder->state.to_ip = decoder->ip;
		return HOP_RETURN;

	case INTEL_PT_TIP:
		if (!decoder->packet.count)
			return HOP_IGNORE;
		intel_pt_set_ip(decoder);
		decoder->state.type = INTEL_PT_INSTRUCTION;
		decoder->state.from_ip = decoder->ip;
		decoder->state.to_ip = 0;
		return HOP_RETURN;

	case INTEL_PT_FUP:
		if (!decoder->packet.count)
			return HOP_IGNORE;
		intel_pt_set_ip(decoder);
		if (intel_pt_fup_event(decoder))
			return HOP_RETURN;
		if (!decoder->branch_enable)
			*no_tip = true;
		if (*no_tip) {
			decoder->state.type = INTEL_PT_INSTRUCTION;
			decoder->state.from_ip = decoder->ip;
			decoder->state.to_ip = 0;
			return HOP_RETURN;
		}
		*err = intel_pt_walk_fup_tip(decoder);
		if (!*err)
			decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
		return HOP_RETURN;

	case INTEL_PT_PSB:
		decoder->last_ip = 0;
		decoder->have_last_ip = true;
		decoder->hop_psb_fup = false;
		*err = intel_pt_walk_psbend(decoder);
		if (*err == -EAGAIN)
			return HOP_AGAIN;
		if (*err)
			return HOP_RETURN;
		if (decoder->hop_psb_fup) {
			decoder->hop_psb_fup = false;
			decoder->state.type = INTEL_PT_INSTRUCTION;
			decoder->state.from_ip = decoder->ip;
			decoder->state.to_ip = 0;
			return HOP_RETURN;
		}
		if (decoder->cbr != decoder->cbr_seen) {
			decoder->state.type = 0;
			return HOP_RETURN;
		}
		return HOP_IGNORE;

	case INTEL_PT_BAD:
	case INTEL_PT_PAD:
	case INTEL_PT_TIP_PGE:
	case INTEL_PT_TSC:
	case INTEL_PT_TMA:
	case INTEL_PT_MODE_EXEC:
	case INTEL_PT_MODE_TSX:
	case INTEL_PT_MTC:
	case INTEL_PT_CYC:
	case INTEL_PT_VMCS:
	case INTEL_PT_PSBEND:
	case INTEL_PT_CBR:
	case INTEL_PT_TRACESTOP:
	case INTEL_PT_PIP:
	case INTEL_PT_OVF:
	case INTEL_PT_MNT:
	case INTEL_PT_PTWRITE:
	case INTEL_PT_PTWRITE_IP:
	case INTEL_PT_EXSTOP:
	case INTEL_PT_EXSTOP_IP:
	case INTEL_PT_MWAIT:
	case INTEL_PT_PWRE:
	case INTEL_PT_PWRX:
	case INTEL_PT_BBP:
	case INTEL_PT_BIP:
	case INTEL_PT_BEP:
	case INTEL_PT_BEP_IP:
	default:
		return HOP_PROCESS;
	}
}

static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
{
	bool no_tip = false;
@@ -1885,6 +2011,19 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
		if (err)
			return err;
next:
		if (decoder->hop) {
			switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
			case HOP_IGNORE:
				continue;
			case HOP_RETURN:
				return err;
			case HOP_AGAIN:
				goto next;
			default:
				break;
			}
		}

		switch (decoder->packet.type) {
		case INTEL_PT_TNT:
			if (!decoder->packet.count)
@@ -1914,6 +2053,12 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
			decoder->state.from_ip = 0;
			decoder->state.to_ip = decoder->ip;
			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
			/*
			 * In hop mode, resample to get the to_ip as an
			 * "instruction" sample.
			 */
			if (decoder->hop)
				decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
			return 0;
		}

@@ -2033,7 +2178,7 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)

		case INTEL_PT_MODE_TSX:
			/* MODE_TSX need not be followed by FUP */
			if (!decoder->pge) {
			if (!decoder->pge || decoder->in_psb) {
				intel_pt_update_in_tx(decoder);
				break;
			}
@@ -2424,6 +2569,10 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
	if (err)
		return err;

	/* In hop mode, resample to get the to_ip as an "instruction" sample */
	if (decoder->hop)
		decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
	else
		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
	decoder->overflow = false;

@@ -2545,6 +2694,13 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)

	if (decoder->ip) {
		decoder->state.type = 0; /* Do not have a sample */
		/*
		 * In hop mode, resample to get the PSB FUP ip as an
		 * "instruction" sample.
		 */
		if (decoder->hop)
			decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
		else
			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
	} else {
		return intel_pt_sync_ip(decoder);
@@ -2609,6 +2765,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
			if (err == -EAGAIN)
				err = intel_pt_walk_trace(decoder);
			break;
		case INTEL_PT_STATE_RESAMPLE:
			err = intel_pt_resample(decoder);
			break;
		default:
			err = intel_pt_bug(decoder);
			break;
+1 −0
Original line number Diff line number Diff line
@@ -250,6 +250,7 @@ struct intel_pt_params {
	uint32_t tsc_ctc_ratio_n;
	uint32_t tsc_ctc_ratio_d;
	enum intel_pt_param_flags flags;
	unsigned int quick;
};

struct intel_pt_decoder;
+5 −1
Original line number Diff line number Diff line
@@ -1030,6 +1030,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
	params.mtc_period = intel_pt_mtc_period(pt);
	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
	params.quick = pt->synth_opts.quick;

	if (pt->filts.cnt > 0)
		params.pgd_ip = intel_pt_pgd_ip;
@@ -1423,6 +1424,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)

	sample.id = ptq->pt->instructions_id;
	sample.stream_id = ptq->pt->instructions_id;
	if (pt->synth_opts.quick)
		sample.period = 1;
	else
		sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;

	sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;