Commit 62b48887 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ARC fixes from Vineet Gupta:
 "Nothing too exciting for now"

* tag 'arc-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
  arc: use swap() to make code cleaner
  arc: perf: Move static structs to where they're really used
  ARC: perf: fix misleading comment about pmu vs counter stop
  arc: Replace lkml.org links with lore
  ARC: perf: Remove redundant initialization of variable idx
  ARC: thread_info.h: correct two typos in a comment
parents 57d17378 8f67f65d
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -50,8 +50,12 @@
 * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
 *
 * Noted at the time of Abilis Timer List corruption
 *	Orig Bug + Rejected solution	: https://lkml.org/lkml/2013/3/29/67
 *	Reasoning			: https://lkml.org/lkml/2013/4/8/15
 *
 * Orig Bug + Rejected solution:
 * https://lore.kernel.org/lkml/1364553218-31255-1-git-send-email-vgupta@synopsys.com
 *
 * Reasoning:
 * https://lore.kernel.org/lkml/CA+55aFyFWjpSVQM6M266tKrG_ZXJzZ-nYejpmXYQXbrr42mGPQ@mail.gmail.com
 *
 ******************************************************************/

+0 −162
Original line number Diff line number Diff line
@@ -63,166 +63,4 @@ struct arc_reg_cc_build {

#define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 8)

/*
 * Some ARC pct quirks:
 *
 * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
 * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
 *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
 *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
 *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
 *	STALLED_CYCLES_FRONTEND.
 *
 *	We could start multiple performance counters and combine everything
 *	afterwards, but that makes it complicated.
 *
 *	Note that I$ cache misses aren't counted by either of the two!
 */

/*
 * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
 * (based on a specific RTL build)
 * Below is the static map between perf generic/arc specific event_id and
 * h/w condition names.
 * At the time of probe, we loop thru each index and find it's name to
 * complete the mapping of perf event_id to h/w index as latter is needed
 * to program the counter really
 */
static const char * const arc_pmu_ev_hw_map[] = {
	/* count cycles */
	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
	[PERF_COUNT_HW_BUS_CYCLES] = "crun",

	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",

	/* counts condition */
	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
	/* All jump instructions that are taken */
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
#ifdef CONFIG_ISA_ARCV2
	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
#else
	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
#endif
	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */
	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */

	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */
	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */
	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */

	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */
	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */
};

#define C(_x)			PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED	0xffff

static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
	[C(L1D)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(L1I)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(LL)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(DTLB)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
		},
			/* DTLB LD/ST Miss not segregated by h/w*/
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(ITLB)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(BPU)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(NODE)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
};

#endif /* __ASM_PERF_EVENT_H */
+2 −2
Original line number Diff line number Diff line
@@ -99,8 +99,8 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)

/*
 * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it.
 * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a
 * syscall, so all that reamins to be tested is _TIF_WORK_MASK
 * SYSCALL_TRACE is anyway separately/unconditionally tested right after a
 * syscall, so all that remains to be tested is _TIF_WORK_MASK
 */

#endif /* _ASM_THREAD_INFO_H */
+164 −2
Original line number Diff line number Diff line
@@ -17,6 +17,168 @@
/* HW holds 8 symbols + one for null terminator */
#define ARCPMU_EVENT_NAME_LEN	9

/*
 * Some ARC pct quirks:
 *
 * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
 * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
 *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
 *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
 *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
 *	STALLED_CYCLES_FRONTEND.
 *
 *	We could start multiple performance counters and combine everything
 *	afterwards, but that makes it complicated.
 *
 *	Note that I$ cache misses aren't counted by either of the two!
 */

/*
 * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
 * (based on a specific RTL build)
 * Below is the static map between perf generic/arc specific event_id and
 * h/w condition names.
 * At the time of probe, we loop thru each index and find it's name to
 * complete the mapping of perf event_id to h/w index as latter is needed
 * to program the counter really
 */
static const char * const arc_pmu_ev_hw_map[] = {
	/* count cycles */
	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
	[PERF_COUNT_HW_BUS_CYCLES] = "crun",

	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",

	/* counts condition */
	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
	/* All jump instructions that are taken */
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
#ifdef CONFIG_ISA_ARCV2
	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
#else
	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
#endif
	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */
	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */

	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */
	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */
	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */

	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */
	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */
};

#define C(_x)			PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED	0xffff

static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
	[C(L1D)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(L1I)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(LL)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(DTLB)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
		},
			/* DTLB LD/ST Miss not segregated by h/w*/
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(ITLB)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(BPU)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
	[C(NODE)] = {
		[C(OP_READ)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_WRITE)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
		[C(OP_PREFETCH)] = {
			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
		},
	},
};

enum arc_pmu_attr_groups {
	ARCPMU_ATTR_GR_EVENTS,
	ARCPMU_ATTR_GR_FORMATS,
@@ -328,7 +490,7 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
	}

	if (!(event->hw.state & PERF_HES_STOPPED)) {
		/* stop ARC pmu here */
		/* stop hw counter here */
		write_aux_reg(ARC_REG_PCT_INDEX, idx);

		/* condition code #0 is always "never" */
@@ -361,7 +523,7 @@ static int arc_pmu_add(struct perf_event *event, int flags)
{
	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
	struct hw_perf_event *hwc = &event->hw;
	int idx = hwc->idx;
	int idx;

	idx = ffz(pmu_cpu->used_mask[0]);
	if (idx == arc_pmu->n_counters)
+3 −8
Original line number Diff line number Diff line
@@ -245,14 +245,9 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
{
	struct eh_frame_hdr_table_entry *e1 = p1;
	struct eh_frame_hdr_table_entry *e2 = p2;
	unsigned long v;

	v = e1->start;
	e1->start = e2->start;
	e2->start = v;
	v = e1->fde;
	e1->fde = e2->fde;
	e2->fde = v;

	swap(e1->start, e2->start);
	swap(e1->fde, e2->fde);
}

static void init_unwind_hdr(struct unwind_table *table,
Loading