Commit 1958b0f9 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-intel-fixes-2023-08-03' of...

Merge tag 'drm-intel-fixes-2023-08-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes

- Fix bug in getting msg length in AUX CH registers handler [gvt] (Yan Zhao)
- Gen12 AUX invalidation fixes [gt] (Andi Shyti, Jonathan Cavitt)
- Fix premature release of request's reusable memory (Janusz Krzysztofik)

- Merge tag 'gvt-fixes-2023-08-02' of https://github.com/intel/gvt-linux

 into drm-intel-fixes (Tvrtko Ursulin)

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZMtkxWGuUKpaRMmo@tursulin-desk
parents 062ff85b 0bc057ea
Loading
Loading
Loading
Loading
+92 −48
Original line number Diff line number Diff line
@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
	return MI_ARB_CHECK | 1 << 8 | state;
}

u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
{
	u32 gsi_offset = gt->uncore->gsi_offset;
	switch (engine->id) {
	case RCS0:
		return GEN12_CCS_AUX_INV;
	case BCS0:
		return GEN12_BCS0_AUX_INV;
	case VCS0:
		return GEN12_VD0_AUX_INV;
	case VCS2:
		return GEN12_VD2_AUX_INV;
	case VECS0:
		return GEN12_VE0_AUX_INV;
	case CCS0:
		return GEN12_CCS0_AUX_INV;
	default:
		return INVALID_MMIO_REG;
	}
}

static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
{
	i915_reg_t reg = gen12_get_aux_inv_reg(engine);

	if (IS_PONTEVECCHIO(engine->i915))
		return false;

	/*
	 * So far platforms supported by i915 having flat ccs do not require
	 * AUX invalidation. Check also whether the engine requires it.
	 */
	return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
}

u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
{
	i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
	u32 gsi_offset = engine->gt->uncore->gsi_offset;

	if (!gen12_needs_ccs_aux_inv(engine))
		return cs;

	*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
	*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
	*cs++ = AUX_INV;
	*cs++ = MI_NOOP;

	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
		MI_SEMAPHORE_REGISTER_POLL |
		MI_SEMAPHORE_POLL |
		MI_SEMAPHORE_SAD_EQ_SDD;
	*cs++ = 0;
	*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
	*cs++ = 0;
	*cs++ = 0;

	return cs;
}
@@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
	struct intel_engine_cs *engine = rq->engine;

	if (mode & EMIT_FLUSH) {
		u32 flags = 0;
	/*
	 * On Aux CCS platforms the invalidation of the Aux
	 * table requires quiescing memory traffic beforehand
	 */
	if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
		u32 bit_group_0 = 0;
		u32 bit_group_1 = 0;
		int err;
		u32 *cs;

@@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
		if (err)
			return err;

		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
		flags |= PIPE_CONTROL_FLUSH_L3;
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;

		/*
		 * When required, in MTL and beyond platforms we
		 * need to set the CCS_FLUSH bit in the pipe control
		 */
		if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
			bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;

		bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
		bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
		bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
		bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
		/* Wa_1409600907:tgl,adl-p */
		flags |= PIPE_CONTROL_DEPTH_STALL;
		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
		bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
		bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
		bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;

		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
		flags |= PIPE_CONTROL_QW_WRITE;
		bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
		bit_group_1 |= PIPE_CONTROL_QW_WRITE;

		flags |= PIPE_CONTROL_CS_STALL;
		bit_group_1 |= PIPE_CONTROL_CS_STALL;

		if (!HAS_3D_PIPELINE(engine->i915))
			flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
			bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
		else if (engine->class == COMPUTE_CLASS)
			flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
			bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;

		cs = intel_ring_begin(rq, 6);
		if (IS_ERR(cs))
			return PTR_ERR(cs);

		cs = gen12_emit_pipe_control(cs,
					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
					     flags, LRC_PPHWSP_SCRATCH_ADDR);
		cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
					     LRC_PPHWSP_SCRATCH_ADDR);
		intel_ring_advance(rq, cs);
	}

@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
		else if (engine->class == COMPUTE_CLASS)
			flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;

		if (!HAS_FLAT_CCS(rq->engine->i915))
			count = 8 + 4;
		else
		count = 8;
		if (gen12_needs_ccs_aux_inv(rq->engine))
			count += 8;

		cs = intel_ring_begin(rq, count);
		if (IS_ERR(cs))
@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)

		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);

		if (!HAS_FLAT_CCS(rq->engine->i915)) {
			/* hsdes: 1809175790 */
			cs = gen12_emit_aux_table_inv(rq->engine->gt,
						      cs, GEN12_GFX_CCS_AUX_NV);
		}
		cs = gen12_emit_aux_table_inv(engine, cs);

		*cs++ = preparser_disable(false);
		intel_ring_advance(rq, cs);
@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)

int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
	intel_engine_mask_t aux_inv = 0;
	u32 cmd, *cs;
	u32 cmd = 4;
	u32 *cs;

	cmd = 4;
	if (mode & EMIT_INVALIDATE) {
		cmd += 2;

		if (!HAS_FLAT_CCS(rq->engine->i915) &&
		    (rq->engine->class == VIDEO_DECODE_CLASS ||
		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
			aux_inv = rq->engine->mask &
				~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
			if (aux_inv)
				cmd += 4;
		}
		if (gen12_needs_ccs_aux_inv(rq->engine))
			cmd += 8;
	}

	cs = intel_ring_begin(rq, cmd);
@@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
		cmd |= MI_INVALIDATE_TLB;
		if (rq->engine->class == VIDEO_DECODE_CLASS)
			cmd |= MI_INVALIDATE_BSD;

		if (gen12_needs_ccs_aux_inv(rq->engine) &&
		    rq->engine->class == COPY_ENGINE_CLASS)
			cmd |= MI_FLUSH_DW_CCS;
	}

	*cs++ = cmd;
@@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
	*cs++ = 0; /* upper addr */
	*cs++ = 0; /* value */

	if (aux_inv) { /* hsdes: 1809175790 */
		if (rq->engine->class == VIDEO_DECODE_CLASS)
			cs = gen12_emit_aux_table_inv(rq->engine->gt,
						      cs, GEN12_VD0_AUX_NV);
		else
			cs = gen12_emit_aux_table_inv(rq->engine->gt,
						      cs, GEN12_VE0_AUX_NV);
	}
	cs = gen12_emit_aux_table_inv(rq->engine, cs);

	if (mode & EMIT_INVALIDATE)
		*cs++ = preparser_disable(false);
+13 −8
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include "intel_gt_regs.h"
#include "intel_gpu_commands.h"

struct intel_engine_cs;
struct intel_gt;
struct i915_request;

@@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);

u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);

static inline u32 *
__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0,
			 u32 bit_group_1, u32 offset)
{
	memset(batch, 0, 6 * sizeof(u32));

	batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
	batch[1] = flags1;
	batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
	batch[1] = bit_group_1;
	batch[2] = offset;

	return batch + 6;
}

static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
static inline u32 *gen8_emit_pipe_control(u32 *batch,
					  u32 bit_group_1, u32 offset)
{
	return __gen8_emit_pipe_control(batch, 0, flags, offset);
	return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset);
}

static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0,
					   u32 bit_group_1, u32 offset)
{
	return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
	return __gen8_emit_pipe_control(batch, bit_group_0,
					bit_group_1, offset);
}

static inline u32 *
+2 −0
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@
#define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
#define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_WAIT_TOKEN	MI_INSTR(0x1c, 3) /* GEN12+ */
#define   MI_SEMAPHORE_REGISTER_POLL	(1 << 16)
#define   MI_SEMAPHORE_POLL		(1 << 15)
#define   MI_SEMAPHORE_SAD_GT_SDD	(0 << 12)
#define   MI_SEMAPHORE_SAD_GTE_SDD	(1 << 12)
@@ -299,6 +300,7 @@
#define   PIPE_CONTROL_QW_WRITE				(1<<14)
#define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
#define   PIPE_CONTROL_CCS_FLUSH			(1<<13) /* MTL+ */
#define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */
+8 −8
Original line number Diff line number Diff line
@@ -332,9 +332,11 @@
#define GEN8_PRIVATE_PAT_HI			_MMIO(0x40e0 + 4)
#define GEN10_PAT_INDEX(index)			_MMIO(0x40e0 + (index) * 4)
#define BSD_HWS_PGA_GEN7			_MMIO(0x4180)
#define GEN12_GFX_CCS_AUX_NV			_MMIO(0x4208)
#define GEN12_VD0_AUX_NV			_MMIO(0x4218)
#define GEN12_VD1_AUX_NV			_MMIO(0x4228)

#define GEN12_CCS_AUX_INV			_MMIO(0x4208)
#define GEN12_VD0_AUX_INV			_MMIO(0x4218)
#define GEN12_VE0_AUX_INV			_MMIO(0x4238)
#define GEN12_BCS0_AUX_INV			_MMIO(0x4248)

#define GEN8_RTCR				_MMIO(0x4260)
#define GEN8_M1TCR				_MMIO(0x4264)
@@ -342,14 +344,12 @@
#define GEN8_BTCR				_MMIO(0x426c)
#define GEN8_VTCR				_MMIO(0x4270)

#define GEN12_VD2_AUX_NV			_MMIO(0x4298)
#define GEN12_VD3_AUX_NV			_MMIO(0x42a8)
#define GEN12_VE0_AUX_NV			_MMIO(0x4238)

#define BLT_HWS_PGA_GEN7			_MMIO(0x4280)

#define GEN12_VE1_AUX_NV			_MMIO(0x42b8)
#define GEN12_VD2_AUX_INV			_MMIO(0x4298)
#define GEN12_CCS0_AUX_INV			_MMIO(0x42c8)
#define   AUX_INV				REG_BIT(0)

#define VEBOX_HWS_PGA_GEN7			_MMIO(0x4380)

#define GEN12_AUX_ERR_DBG			_MMIO(0x43f4)
+2 −15
Original line number Diff line number Diff line
@@ -1364,10 +1364,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
	    IS_DG2_G11(ce->engine->i915))
		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);

	/* hsdes: 1809175790 */
	if (!HAS_FLAT_CCS(ce->engine->i915))
		cs = gen12_emit_aux_table_inv(ce->engine->gt,
					      cs, GEN12_GFX_CCS_AUX_NV);
	cs = gen12_emit_aux_table_inv(ce->engine, cs);

	/* Wa_16014892111 */
	if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
@@ -1392,17 +1389,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
						    0);

	/* hsdes: 1809175790 */
	if (!HAS_FLAT_CCS(ce->engine->i915)) {
		if (ce->engine->class == VIDEO_DECODE_CLASS)
			cs = gen12_emit_aux_table_inv(ce->engine->gt,
						      cs, GEN12_VD0_AUX_NV);
		else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
			cs = gen12_emit_aux_table_inv(ce->engine->gt,
						      cs, GEN12_VE0_AUX_NV);
	}

	return cs;
	return gen12_emit_aux_table_inv(ce->engine, cs);
}

static void
Loading