Commit a5c3a3cb authored by Umesh Nerlige Ramappa's avatar Umesh Nerlige Ramappa Committed by John Harrison
Browse files

drm/i915/perf: Determine gen12 oa ctx offset at runtime



Some SKUs of same gen12 platform may have different oactxctrl
offsets. For gen12, determine oactxctrl offsets at runtime.

v2: (Lionel)
- Move MI definitions to intel_gpu_commands.h
- Ensure __find_reg_in_lri does read past context image size

v3: (Ashutosh)
- Drop unnecessary use of double underscores
- fix find_reg_in_lri
- Return error if oa context offset is U32_MAX
- Error out if oa_ctx_ctrl_offset does not find offset

v4: (Ashutosh)
- Warn on odd MI LRI_LEN
- Remove unnecessary check for valid_oactxctrl_offset
- Drop valid_oactxctrl_offset macro

v5: Drop unrelated comment

Signed-off-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: default avatarAshutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221026222102.5526-5-umesh.nerlige.ramappa@intel.com
parent 2d9da585
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -187,6 +187,10 @@
#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/

#define MI_OPCODE(x)		(((x) >> 23) & 0x3f)
#define IS_MI_LRI_CMD(x)	(MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
#define MI_LRI_LEN(x)		(((x) & 0xff) + 1)

/*
 * 3D instructions used by the kernel
 */
+122 −24
Original line number Diff line number Diff line
@@ -1356,6 +1356,74 @@ static int gen12_get_render_context_id(struct i915_perf_stream *stream)
	return 0;
}

static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
{
	u32 idx = *offset;
	u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
	bool found = false;

	idx++;
	for (; idx < len; idx += 2) {
		if (state[idx] == reg) {
			found = true;
			break;
		}
	}

	*offset = idx;
	return found;
}

static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
{
	u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
	u32 *state = ce->lrc_reg_state;

	for (offset = 0; offset < len; ) {
		if (IS_MI_LRI_CMD(state[offset])) {
			/*
			 * We expect reg-value pairs in MI_LRI command, so
			 * MI_LRI_LEN() should be even, if not, issue a warning.
			 */
			drm_WARN_ON(&ce->engine->i915->drm,
				    MI_LRI_LEN(state[offset]) & 0x1);

			if (oa_find_reg_in_lri(state, reg, &offset, len))
				break;
		} else {
			offset++;
		}
	}

	return offset < len ? offset : U32_MAX;
}

static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
{
	i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
	struct i915_perf *perf = &ce->engine->i915->perf;
	u32 offset = perf->ctx_oactxctrl_offset;

	/* Do this only once. Failure is stored as offset of U32_MAX */
	if (offset)
		goto exit;

	offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
	perf->ctx_oactxctrl_offset = offset;

	drm_dbg(&ce->engine->i915->drm,
		"%s oa ctx control at 0x%08x dword offset\n",
		ce->engine->name, offset);

exit:
	return offset && offset != U32_MAX ? 0 : -ENODEV;
}

static bool engine_supports_mi_query(struct intel_engine_cs *engine)
{
	return engine->class == RENDER_CLASS;
}

/**
 * oa_get_render_ctx_id - determine and hold ctx hw id
 * @stream: An i915-perf stream opened for OA metrics
@@ -1375,6 +1443,21 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
	if (IS_ERR(ce))
		return PTR_ERR(ce);

	if (engine_supports_mi_query(stream->engine)) {
		/*
		 * We are enabling perf query here. If we don't find the context
		 * offset here, just return an error.
		 */
		ret = set_oa_ctx_ctrl_offset(ce);
		if (ret) {
			intel_context_unpin(ce);
			drm_err(&stream->perf->i915->drm,
				"Enabling perf query failed for %s\n",
				stream->engine->name);
			return ret;
		}
	}

	switch (GRAPHICS_VER(ce->engine->i915)) {
	case 7: {
		/*
@@ -2406,10 +2489,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
	int err;
	struct intel_context *ce = stream->pinned_ctx;
	u32 format = stream->oa_buffer.format;
	u32 offset = stream->perf->ctx_oactxctrl_offset;
	struct flex regs_context[] = {
		{
			GEN8_OACTXCONTROL,
			stream->perf->ctx_oactxctrl_offset + 1,
			offset + 1,
			active ? GEN8_OA_COUNTER_RESUME : 0,
		},
	};
@@ -2439,7 +2523,8 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
	if (err)
		return err;

	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
	err = gen8_modify_context(ce, regs_context,
				  ARRAY_SIZE(regs_context));
	intel_context_unlock_pinned(ce);
	if (err)
		return err;
@@ -2564,6 +2649,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
			   const struct i915_oa_config *oa_config,
			   struct i915_active *active)
{
	u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
	/* The MMIO offsets for Flex EU registers aren't contiguous */
	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
@@ -2574,7 +2660,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
		},
		{
			GEN8_OACTXCONTROL,
			stream->perf->ctx_oactxctrl_offset + 1,
			ctx_oactxctrl + 1,
		},
		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
@@ -4543,6 +4629,37 @@ static void oa_init_supported_formats(struct i915_perf *perf)
	}
}

static void i915_perf_init_info(struct drm_i915_private *i915)
{
	struct i915_perf *perf = &i915->perf;

	switch (GRAPHICS_VER(i915)) {
	case 8:
		perf->ctx_oactxctrl_offset = 0x120;
		perf->ctx_flexeu0_offset = 0x2ce;
		perf->gen8_valid_ctx_bit = BIT(25);
		break;
	case 9:
		perf->ctx_oactxctrl_offset = 0x128;
		perf->ctx_flexeu0_offset = 0x3de;
		perf->gen8_valid_ctx_bit = BIT(16);
		break;
	case 11:
		perf->ctx_oactxctrl_offset = 0x124;
		perf->ctx_flexeu0_offset = 0x78e;
		perf->gen8_valid_ctx_bit = BIT(16);
		break;
	case 12:
		/*
		 * Calculate offset at runtime in oa_pin_context for gen12 and
		 * cache the value in perf->ctx_oactxctrl_offset.
		 */
		break;
	default:
		MISSING_CASE(GRAPHICS_VER(i915));
	}
}

/**
 * i915_perf_init - initialize i915-perf state on module bind
 * @i915: i915 device instance
@@ -4581,6 +4698,7 @@ void i915_perf_init(struct drm_i915_private *i915)
		 * execlist mode by default.
		 */
		perf->ops.read = gen8_oa_read;
		i915_perf_init_info(i915);

		if (IS_GRAPHICS_VER(i915, 8, 9)) {
			perf->ops.is_valid_b_counter_reg =
@@ -4600,18 +4718,6 @@ void i915_perf_init(struct drm_i915_private *i915)
			perf->ops.enable_metric_set = gen8_enable_metric_set;
			perf->ops.disable_metric_set = gen8_disable_metric_set;
			perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;

			if (GRAPHICS_VER(i915) == 8) {
				perf->ctx_oactxctrl_offset = 0x120;
				perf->ctx_flexeu0_offset = 0x2ce;

				perf->gen8_valid_ctx_bit = BIT(25);
			} else {
				perf->ctx_oactxctrl_offset = 0x128;
				perf->ctx_flexeu0_offset = 0x3de;

				perf->gen8_valid_ctx_bit = BIT(16);
			}
		} else if (GRAPHICS_VER(i915) == 11) {
			perf->ops.is_valid_b_counter_reg =
				gen7_is_valid_b_counter_addr;
@@ -4625,11 +4731,6 @@ void i915_perf_init(struct drm_i915_private *i915)
			perf->ops.enable_metric_set = gen8_enable_metric_set;
			perf->ops.disable_metric_set = gen11_disable_metric_set;
			perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;

			perf->ctx_oactxctrl_offset = 0x124;
			perf->ctx_flexeu0_offset = 0x78e;

			perf->gen8_valid_ctx_bit = BIT(16);
		} else if (GRAPHICS_VER(i915) == 12) {
			perf->ops.is_valid_b_counter_reg =
				gen12_is_valid_b_counter_addr;
@@ -4643,9 +4744,6 @@ void i915_perf_init(struct drm_i915_private *i915)
			perf->ops.enable_metric_set = gen12_enable_metric_set;
			perf->ops.disable_metric_set = gen12_disable_metric_set;
			perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;

			perf->ctx_flexeu0_offset = 0;
			perf->ctx_oactxctrl_offset = 0x144;
		}
	}

+1 −1
Original line number Diff line number Diff line
@@ -97,7 +97,7 @@
#define  GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
#define  GEN12_OAR_OACONTROL_COUNTER_ENABLE       (1 << 0)

#define GEN12_OACTXCONTROL _MMIO(0x2360)
#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360)
#define GEN12_OAR_OASTATUS _MMIO(0x2968)

/* Gen12 OAG unit */