Commit d240daa2 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-intel-gt-next-2023-03-16' of...

Merge tag 'drm-intel-gt-next-2023-03-16' of git://anongit.freedesktop.org/drm/drm-intel

 into drm-next

Driver Changes:

- Fix issue #6333: "list_add corruption" and full system lockup from
  performance monitoring (Janusz)
- Give the punit time to settle before fatally failing (Aravind, Chris)
- Don't use stolen memory or BAR for ring buffers on LLC platforms (John)
- Add missing ecodes and correct timeline seqno on GuC error captures (John)
- Make sure DSM size has correct 1MiB granularity on Gen12+ (Nirmoy,
  Lucas)
- Fix potential SSEU max_subslices array-index-out-of-bounds access on Gen11 (Andrea)
- Whitelist COMMON_SLICE_CHICKEN3 for UMD access on Gen12+ (Matt R.)
- Apply Wa_1408615072/Wa_1407596294 correctly on Gen11 (Matt R)
- Apply LNCF/LBCF workarounds correctly on XeHP SDV/PVC/DG2 (Matt R)
- Implement Wa_1606376872 for Xe_LP (Gustavo)
- Consider GSI offset when doing MCR lookups on Meteorlake+ (Matt R.)
- Add engine TLB invalidation for Meteorlake (Matt R.)
- Fix GSC Driver-FLR completion on Meteorlake (Alan)
- Fix GSC races on driver load/unload on Meteorlake+ (Daniele)
- Disable MC6 for MTL A step (Badal)

- Consolidate TLB invalidation flow (Tvrtko)
- Improve debug GuC/HuC debug messages (Michal Wa., John)
- Move fd_install after last use of fence (Rob)
- Initialize the obj flags for shmem objects (Aravind)
- Fix missing debug object activation (Nirmoy)
- Probe lmem before the stolen portion (Matt A)
- Improve clean up of GuC busyness stats worker (John)
- Fix missing return code checks in GuC submission init (John)
- Annotate two more workaround/tuning registers as MCR on PVC (Matt R)
- Fix GEN8_MISCCPCTL definition and remove unused INF_UNIT_LEVEL_CLKGATE (Lucas)
- Use sysfs_emit() and sysfs_emit_at() (Nirmoy)
- Make kobj_type structures constant (Thomas W.)
- make kobj attributes const on gt/ (Jani)
- Remove the unused virtualized start hack on buddy allocator (Matt A)
- Remove redundant check for DG1 (Lucas)
- Move DG2 tuning to the right function (Lucas)
- Rename dev_priv to i915 for private data naming consistency in gt/ (Andi)
- Remove unnecessary whitelisting of CS_CTX_TIMESTAMP on Xe_HP platforms (Matt R.)
-

- Escape wildcard in method names in kerneldoc (Bagas)
- Selftest improvements (Chris, Jonathan, Tvrtko, Anshuman, Tejas)
- Fix sparse warnings (Jani)

[airlied: fix unused variable in intel_workarounds]
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZBMSb42yjjzczRhj@jlahtine-mobl.ger.corp.intel.com
parents c6265f5c d2a9692a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -909,7 +909,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
		dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
		if (WARN_ON(lmem_size < dsm_base))
			return ERR_PTR(-ENODEV);
		dsm_size = lmem_size - dsm_base;
		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
	}

	io_size = dsm_size;
+9 −10
Original line number Diff line number Diff line
@@ -108,31 +108,30 @@ struct tiled_blits {
	u32 height;
};

static bool supports_x_tiling(const struct drm_i915_private *i915)
static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915)
{
	int gen = GRAPHICS_VER(i915);

	/* XY_FAST_COPY_BLT does not exist on pre-gen9 platforms */
	drm_WARN_ON(&i915->drm, gen < 9);

	if (gen < 12)
		return true;

	if (!HAS_LMEM(i915) || IS_DG1(i915))
	if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
		return false;

	return true;
	return HAS_DISPLAY(i915);
}

static bool fast_blit_ok(const struct blit_buffer *buf)
{
	int gen = GRAPHICS_VER(buf->vma->vm->i915);

	if (gen < 9)
	/* XY_FAST_COPY_BLT does not exist on pre-gen9 platforms */
	if (GRAPHICS_VER(buf->vma->vm->i915) < 9)
		return false;

	if (gen < 12)
		return true;

	/* filter out platforms with unsupported X-tile support in fastblit */
	if (buf->tiling == CLIENT_TILING_X && !supports_x_tiling(buf->vma->vm->i915))
	if (buf->tiling == CLIENT_TILING_X && !fastblit_supports_x_tiling(buf->vma->vm->i915))
		return false;

	return true;
+128 −9
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@

#include "gem/i915_gem_context.h"
#include "gem/i915_gem_internal.h"
#include "gt/intel_gt_print.h"
#include "gt/intel_gt_regs.h"

#include "i915_cmd_parser.h"
@@ -1143,12 +1144,130 @@ static int init_status_page(struct intel_engine_cs *engine)
	return ret;
}

static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
{
	static const union intel_engine_tlb_inv_reg gen8_regs[] = {
		[RENDER_CLASS].reg		= GEN8_RTCR,
		[VIDEO_DECODE_CLASS].reg	= GEN8_M1TCR, /* , GEN8_M2TCR */
		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN8_VTCR,
		[COPY_ENGINE_CLASS].reg		= GEN8_BTCR,
	};
	static const union intel_engine_tlb_inv_reg gen12_regs[] = {
		[RENDER_CLASS].reg		= GEN12_GFX_TLB_INV_CR,
		[VIDEO_DECODE_CLASS].reg	= GEN12_VD_TLB_INV_CR,
		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN12_VE_TLB_INV_CR,
		[COPY_ENGINE_CLASS].reg		= GEN12_BLT_TLB_INV_CR,
		[COMPUTE_CLASS].reg		= GEN12_COMPCTX_TLB_INV_CR,
	};
	static const union intel_engine_tlb_inv_reg xehp_regs[] = {
		[RENDER_CLASS].mcr_reg		  = XEHP_GFX_TLB_INV_CR,
		[VIDEO_DECODE_CLASS].mcr_reg	  = XEHP_VD_TLB_INV_CR,
		[VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
		[COPY_ENGINE_CLASS].mcr_reg	  = XEHP_BLT_TLB_INV_CR,
		[COMPUTE_CLASS].mcr_reg		  = XEHP_COMPCTX_TLB_INV_CR,
	};
	static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
		[VIDEO_DECODE_CLASS].reg	  = GEN12_VD_TLB_INV_CR,
		[VIDEO_ENHANCEMENT_CLASS].reg     = GEN12_VE_TLB_INV_CR,
		[OTHER_CLASS].reg		  = XELPMP_GSC_TLB_INV_CR,
	};
	struct drm_i915_private *i915 = engine->i915;
	const unsigned int instance = engine->instance;
	const unsigned int class = engine->class;
	const union intel_engine_tlb_inv_reg *regs;
	union intel_engine_tlb_inv_reg reg;
	unsigned int num = 0;
	u32 val;

	/*
	 * New platforms should not be added with catch-all-newer (>=)
	 * condition so that any later platform added triggers the below warning
	 * and in turn mandates a human cross-check of whether the invalidation
	 * flows have compatible semantics.
	 *
	 * For instance with the 11.00 -> 12.00 transition three out of five
	 * respective engine registers were moved to masked type. Then after the
	 * 12.00 -> 12.50 transition multi cast handling is required too.
	 */

	if (engine->gt->type == GT_MEDIA) {
		if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
			regs = xelpmp_regs;
			num = ARRAY_SIZE(xelpmp_regs);
		}
	} else {
		if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
		    GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
		    GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
		    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
			regs = xehp_regs;
			num = ARRAY_SIZE(xehp_regs);
		} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
			   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
			regs = gen12_regs;
			num = ARRAY_SIZE(gen12_regs);
		} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
			regs = gen8_regs;
			num = ARRAY_SIZE(gen8_regs);
		} else if (GRAPHICS_VER(i915) < 8) {
			return 0;
		}
	}

	if (gt_WARN_ONCE(engine->gt, !num,
			 "Platform does not implement TLB invalidation!"))
		return -ENODEV;

	if (gt_WARN_ON_ONCE(engine->gt,
			    class >= num ||
			    (!regs[class].reg.reg &&
			     !regs[class].mcr_reg.reg)))
		return -ERANGE;

	reg = regs[class];

	if (regs == xelpmp_regs && class == OTHER_CLASS) {
		/*
		 * There's only a single GSC instance, but it uses register bit
		 * 1 instead of either 0 or OTHER_GSC_INSTANCE.
		 */
		GEM_WARN_ON(instance != OTHER_GSC_INSTANCE);
		val = 1;
	} else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) {
		reg.reg = GEN8_M2TCR;
		val = 0;
	} else {
		val = instance;
	}

	val = BIT(val);

	engine->tlb_inv.mcr = regs == xehp_regs;
	engine->tlb_inv.reg = reg;
	engine->tlb_inv.done = val;

	if (GRAPHICS_VER(i915) >= 12 &&
	    (engine->class == VIDEO_DECODE_CLASS ||
	     engine->class == VIDEO_ENHANCEMENT_CLASS ||
	     engine->class == COMPUTE_CLASS ||
	     engine->class == OTHER_CLASS))
		engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
	else
		engine->tlb_inv.request = val;

	return 0;
}

static int engine_setup_common(struct intel_engine_cs *engine)
{
	int err;

	init_llist_head(&engine->barrier_tasks);

	err = intel_engine_init_tlb_invalidation(engine);
	if (err)
		return err;

	err = init_status_page(engine);
	if (err)
		return err;
@@ -1939,13 +2058,13 @@ static const char *repr_timer(const struct timer_list *t)
static void intel_engine_print_registers(struct intel_engine_cs *engine,
					 struct drm_printer *m)
{
	struct drm_i915_private *dev_priv = engine->i915;
	struct drm_i915_private *i915 = engine->i915;
	struct intel_engine_execlists * const execlists = &engine->execlists;
	u64 addr;

	if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(dev_priv, 4, 7))
	if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(i915, 4, 7))
		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
	if (HAS_EXECLISTS(dev_priv)) {
	if (HAS_EXECLISTS(i915)) {
		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
@@ -1966,7 +2085,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
			   ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
	}

	if (GRAPHICS_VER(dev_priv) >= 6) {
	if (GRAPHICS_VER(i915) >= 6) {
		drm_printf(m, "\tRING_IMR:   0x%08x\n",
			   ENGINE_READ(engine, RING_IMR));
		drm_printf(m, "\tRING_ESR:   0x%08x\n",
@@ -1983,15 +2102,15 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
	addr = intel_engine_get_last_batch_head(engine);
	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
		   upper_32_bits(addr), lower_32_bits(addr));
	if (GRAPHICS_VER(dev_priv) >= 8)
	if (GRAPHICS_VER(i915) >= 8)
		addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
	else if (GRAPHICS_VER(dev_priv) >= 4)
	else if (GRAPHICS_VER(i915) >= 4)
		addr = ENGINE_READ(engine, RING_DMA_FADD);
	else
		addr = ENGINE_READ(engine, DMA_FADD_I8XX);
	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
		   upper_32_bits(addr), lower_32_bits(addr));
	if (GRAPHICS_VER(dev_priv) >= 4) {
	if (GRAPHICS_VER(i915) >= 4) {
		drm_printf(m, "\tIPEIR: 0x%08x\n",
			   ENGINE_READ(engine, RING_IPEIR));
		drm_printf(m, "\tIPEHR: 0x%08x\n",
@@ -2001,7 +2120,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
	}

	if (HAS_EXECLISTS(dev_priv) && !intel_engine_uses_guc(engine)) {
	if (HAS_EXECLISTS(i915) && !intel_engine_uses_guc(engine)) {
		struct i915_request * const *port, *rq;
		const u32 *hws =
			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
@@ -2067,7 +2186,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
		}
		rcu_read_unlock();
		i915_sched_engine_active_unlock_bh(engine->sched_engine);
	} else if (GRAPHICS_VER(dev_priv) > 6) {
	} else if (GRAPHICS_VER(i915) > 6) {
		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
			   ENGINE_READ(engine, RING_PP_DIR_BASE));
		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
+14 −0
Original line number Diff line number Diff line
@@ -341,6 +341,18 @@ struct intel_engine_guc_stats {
	u64 start_gt_clk;
};

union intel_engine_tlb_inv_reg {
	i915_reg_t	reg;
	i915_mcr_reg_t	mcr_reg;
};

struct intel_engine_tlb_inv {
	bool mcr;
	union intel_engine_tlb_inv_reg reg;
	u32 request;
	u32 done;
};

struct intel_engine_cs {
	struct drm_i915_private *i915;
	struct intel_gt *gt;
@@ -372,6 +384,8 @@ struct intel_engine_cs {
	u32 context_size;
	u32 mmio_base;

	struct intel_engine_tlb_inv tlb_inv;

	/*
	 * Some w/a require forcewake to be held (which prevents RC6) while
	 * a particular engine is active. If so, we set fw_domain to which
+1 −0
Original line number Diff line number Diff line
@@ -394,6 +394,7 @@
#define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
#define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
#define  MI_DO_COMPARE		REG_BIT(21)

#define STATE_BASE_ADDRESS \
	((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16))
Loading